mloda 0.3.0__py3-none-any.whl → 0.3.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (92) hide show
  1. {mloda-0.3.0.dist-info → mloda-0.3.2.dist-info}/METADATA +10 -10
  2. {mloda-0.3.0.dist-info → mloda-0.3.2.dist-info}/RECORD +92 -91
  3. mloda_core/abstract_plugins/components/base_artifact.py +3 -1
  4. mloda_core/abstract_plugins/components/feature.py +4 -4
  5. mloda_core/abstract_plugins/components/feature_chainer/feature_chain_parser.py +44 -17
  6. mloda_core/abstract_plugins/components/feature_collection.py +2 -2
  7. mloda_core/abstract_plugins/components/feature_group_version.py +4 -4
  8. mloda_core/abstract_plugins/components/feature_name.py +0 -3
  9. mloda_core/abstract_plugins/components/input_data/base_input_data.py +3 -3
  10. mloda_core/abstract_plugins/components/link.py +113 -29
  11. mloda_core/abstract_plugins/components/options.py +10 -10
  12. mloda_core/api/prepare/setup_compute_framework.py +2 -2
  13. mloda_core/api/request.py +44 -13
  14. mloda_core/core/step/feature_group_step.py +2 -1
  15. mloda_core/filter/filter_engine.py +3 -12
  16. mloda_core/filter/filter_parameter.py +55 -0
  17. mloda_core/filter/single_filter.py +4 -4
  18. mloda_core/prepare/execution_plan.py +12 -6
  19. mloda_core/prepare/graph/graph.py +3 -3
  20. mloda_core/prepare/identify_feature_group.py +10 -3
  21. mloda_core/prepare/resolve_links.py +86 -18
  22. mloda_core/runtime/flight/flight_server.py +1 -1
  23. mloda_core/runtime/run.py +7 -5
  24. mloda_core/runtime/worker/multiprocessing_worker.py +11 -9
  25. mloda_plugins/compute_framework/base_implementations/duckdb/duckdb_filter_engine.py +7 -33
  26. mloda_plugins/compute_framework/base_implementations/duckdb/duckdb_pyarrow_transformer.py +1 -1
  27. mloda_plugins/compute_framework/base_implementations/iceberg/iceberg_filter_engine.py +22 -12
  28. mloda_plugins/compute_framework/base_implementations/iceberg/iceberg_framework.py +2 -2
  29. mloda_plugins/compute_framework/base_implementations/iceberg/iceberg_pyarrow_transformer.py +2 -2
  30. mloda_plugins/compute_framework/base_implementations/pandas/dataframe.py +2 -2
  31. mloda_plugins/compute_framework/base_implementations/pandas/pandaspyarrowtransformer.py +1 -1
  32. mloda_plugins/compute_framework/base_implementations/polars/dataframe.py +3 -3
  33. mloda_plugins/compute_framework/base_implementations/polars/lazy_dataframe.py +5 -5
  34. mloda_plugins/compute_framework/base_implementations/polars/polars_filter_engine.py +8 -34
  35. mloda_plugins/compute_framework/base_implementations/polars/polars_lazy_merge_engine.py +1 -1
  36. mloda_plugins/compute_framework/base_implementations/polars/polars_lazy_pyarrow_transformer.py +3 -3
  37. mloda_plugins/compute_framework/base_implementations/polars/polars_merge_engine.py +1 -1
  38. mloda_plugins/compute_framework/base_implementations/polars/polars_pyarrow_transformer.py +2 -2
  39. mloda_plugins/compute_framework/base_implementations/pyarrow/pyarrow_filter_engine.py +7 -33
  40. mloda_plugins/compute_framework/base_implementations/pyarrow/table.py +1 -1
  41. mloda_plugins/compute_framework/base_implementations/python_dict/python_dict_filter_engine.py +13 -32
  42. mloda_plugins/compute_framework/base_implementations/python_dict/python_dict_framework.py +1 -1
  43. mloda_plugins/compute_framework/base_implementations/python_dict/python_dict_pyarrow_transformer.py +1 -1
  44. mloda_plugins/compute_framework/base_implementations/spark/spark_filter_engine.py +13 -32
  45. mloda_plugins/compute_framework/base_implementations/spark/spark_framework.py +4 -4
  46. mloda_plugins/compute_framework/base_implementations/spark/spark_pyarrow_transformer.py +1 -1
  47. mloda_plugins/config/feature/loader.py +12 -18
  48. mloda_plugins/feature_group/experimental/aggregated_feature_group/base.py +20 -17
  49. mloda_plugins/feature_group/experimental/aggregated_feature_group/pandas.py +8 -8
  50. mloda_plugins/feature_group/experimental/aggregated_feature_group/polars_lazy.py +8 -8
  51. mloda_plugins/feature_group/experimental/aggregated_feature_group/pyarrow.py +7 -7
  52. mloda_plugins/feature_group/experimental/clustering/base.py +26 -26
  53. mloda_plugins/feature_group/experimental/clustering/pandas.py +31 -29
  54. mloda_plugins/feature_group/experimental/data_quality/missing_value/base.py +23 -22
  55. mloda_plugins/feature_group/experimental/data_quality/missing_value/pandas.py +16 -16
  56. mloda_plugins/feature_group/experimental/data_quality/missing_value/pyarrow.py +9 -11
  57. mloda_plugins/feature_group/experimental/data_quality/missing_value/python_dict.py +8 -8
  58. mloda_plugins/feature_group/experimental/default_options_key.py +1 -1
  59. mloda_plugins/feature_group/experimental/dimensionality_reduction/base.py +17 -15
  60. mloda_plugins/feature_group/experimental/dimensionality_reduction/pandas.py +30 -18
  61. mloda_plugins/feature_group/experimental/dynamic_feature_group_factory/dynamic_feature_group_factory.py +35 -35
  62. mloda_plugins/feature_group/experimental/forecasting/base.py +39 -29
  63. mloda_plugins/feature_group/experimental/forecasting/pandas.py +18 -18
  64. mloda_plugins/feature_group/experimental/geo_distance/base.py +18 -20
  65. mloda_plugins/feature_group/experimental/geo_distance/pandas.py +2 -2
  66. mloda_plugins/feature_group/experimental/llm/cli_features/refactor_git_cached.py +6 -6
  67. mloda_plugins/feature_group/experimental/llm/installed_packages_feature_group.py +2 -2
  68. mloda_plugins/feature_group/experimental/llm/list_directory_feature_group.py +2 -2
  69. mloda_plugins/feature_group/experimental/llm/llm_api/llm_base_request.py +2 -2
  70. mloda_plugins/feature_group/experimental/llm/llm_api/request_loop.py +3 -2
  71. mloda_plugins/feature_group/experimental/llm/llm_file_selector.py +1 -1
  72. mloda_plugins/feature_group/experimental/node_centrality/base.py +8 -12
  73. mloda_plugins/feature_group/experimental/node_centrality/pandas.py +2 -2
  74. mloda_plugins/feature_group/experimental/sklearn/encoding/base.py +11 -12
  75. mloda_plugins/feature_group/experimental/sklearn/encoding/pandas.py +2 -2
  76. mloda_plugins/feature_group/experimental/sklearn/pipeline/base.py +9 -14
  77. mloda_plugins/feature_group/experimental/sklearn/pipeline/pandas.py +2 -2
  78. mloda_plugins/feature_group/experimental/sklearn/scaling/base.py +8 -9
  79. mloda_plugins/feature_group/experimental/sklearn/scaling/pandas.py +2 -2
  80. mloda_plugins/feature_group/experimental/source_input_feature.py +10 -10
  81. mloda_plugins/feature_group/experimental/text_cleaning/base.py +8 -11
  82. mloda_plugins/feature_group/experimental/text_cleaning/pandas.py +2 -2
  83. mloda_plugins/feature_group/experimental/time_window/base.py +27 -25
  84. mloda_plugins/feature_group/experimental/time_window/pandas.py +8 -8
  85. mloda_plugins/feature_group/experimental/time_window/pyarrow.py +6 -6
  86. mloda_plugins/feature_group/input_data/read_context_files.py +1 -1
  87. mloda_plugins/function_extender/base_implementations/otel/otel_extender.py +1 -1
  88. {mloda-0.3.0.dist-info → mloda-0.3.2.dist-info}/WHEEL +0 -0
  89. {mloda-0.3.0.dist-info → mloda-0.3.2.dist-info}/entry_points.txt +0 -0
  90. {mloda-0.3.0.dist-info → mloda-0.3.2.dist-info}/licenses/LICENSE.TXT +0 -0
  91. {mloda-0.3.0.dist-info → mloda-0.3.2.dist-info}/licenses/NOTICE.md +0 -0
  92. {mloda-0.3.0.dist-info → mloda-0.3.2.dist-info}/top_level.txt +0 -0
@@ -5,7 +5,7 @@ from mloda_core.abstract_plugins.components.framework_transformer.base_transform
5
5
  try:
6
6
  import polars as pl
7
7
  except ImportError:
8
- pl = None # type: ignore
8
+ pl = None # type: ignore[assignment]
9
9
 
10
10
  try:
11
11
  import pyarrow as pa
@@ -13,7 +13,7 @@ except ImportError:
13
13
  pa = None
14
14
 
15
15
 
16
- class PolarsLazyPyarrowTransformer(BaseTransformer):
16
+ class PolarsLazyPyArrowTransformer(BaseTransformer):
17
17
  """
18
18
  Transformer for converting between Polars LazyFrame and PyArrow Table.
19
19
 
@@ -66,4 +66,4 @@ class PolarsLazyPyarrowTransformer(BaseTransformer):
66
66
  raise ImportError("Polars is not installed. To be able to use this framework, please install polars.")
67
67
  # Convert PyArrow to DataFrame, then make it lazy
68
68
  df = pl.from_arrow(data)
69
- return df.lazy() # type: ignore
69
+ return df.lazy() # type: ignore[union-attr]
@@ -7,7 +7,7 @@ from mloda_core.abstract_plugins.components.merge.base_merge_engine import BaseM
7
7
  try:
8
8
  import polars as pl
9
9
  except ImportError:
10
- pl = None # type: ignore
10
+ pl = None # type: ignore[assignment]
11
11
 
12
12
 
13
13
  class PolarsMergeEngine(BaseMergeEngine):
@@ -5,7 +5,7 @@ from mloda_core.abstract_plugins.components.framework_transformer.base_transform
5
5
  try:
6
6
  import polars as pl
7
7
  except ImportError:
8
- pl = None # type: ignore
8
+ pl = None # type: ignore[assignment]
9
9
 
10
10
  try:
11
11
  import pyarrow as pa
@@ -13,7 +13,7 @@ except ImportError:
13
13
  pa = None
14
14
 
15
15
 
16
- class PolarsPyarrowTransformer(BaseTransformer):
16
+ class PolarsPyArrowTransformer(BaseTransformer):
17
17
  """
18
18
  Transformer for converting between Polars DataFrame and PyArrow Table.
19
19
 
@@ -40,11 +40,7 @@ class PyArrowFilterEngine(BaseFilterEngine):
40
40
  column_name = str(filter_feature.name)
41
41
 
42
42
  # Extract the value from the parameter
43
- value = None
44
- for param in filter_feature.parameter:
45
- if param[0] == "value":
46
- value = param[1]
47
- break
43
+ value = filter_feature.parameter.value
48
44
 
49
45
  if value is None:
50
46
  raise ValueError(f"Filter parameter 'value' not found in {filter_feature.parameter}")
@@ -59,14 +55,8 @@ class PyArrowFilterEngine(BaseFilterEngine):
59
55
  column_name = str(filter_feature.name)
60
56
 
61
57
  # Check if this is a complex parameter with max/max_exclusive or a simple one with value
62
- has_max = False
63
- has_value = False
64
-
65
- for param in filter_feature.parameter:
66
- if param[0] == "max":
67
- has_max = True
68
- elif param[0] == "value":
69
- has_value = True
58
+ has_max = filter_feature.parameter.max_value is not None
59
+ has_value = filter_feature.parameter.value is not None
70
60
 
71
61
  if has_max:
72
62
  # Complex parameter - use get_min_max_operator
@@ -90,11 +80,7 @@ class PyArrowFilterEngine(BaseFilterEngine):
90
80
  return data.filter(mask)
91
81
  elif has_value:
92
82
  # Simple parameter - extract the value
93
- value = None
94
- for param in filter_feature.parameter:
95
- if param[0] == "value":
96
- value = param[1]
97
- break
83
+ value = filter_feature.parameter.value
98
84
 
99
85
  if value is None:
100
86
  raise ValueError(f"Filter parameter 'value' not found in {filter_feature.parameter}")
@@ -111,11 +97,7 @@ class PyArrowFilterEngine(BaseFilterEngine):
111
97
  column_name = str(filter_feature.name)
112
98
 
113
99
  # Extract the value from the parameter
114
- value = None
115
- for param in filter_feature.parameter:
116
- if param[0] == "value":
117
- value = param[1]
118
- break
100
+ value = filter_feature.parameter.value
119
101
 
120
102
  if value is None:
121
103
  raise ValueError(f"Filter parameter 'value' not found in {filter_feature.parameter}")
@@ -130,11 +112,7 @@ class PyArrowFilterEngine(BaseFilterEngine):
130
112
  column_name = str(filter_feature.name)
131
113
 
132
114
  # Extract the value from the parameter
133
- value = None
134
- for param in filter_feature.parameter:
135
- if param[0] == "value":
136
- value = param[1]
137
- break
115
+ value = filter_feature.parameter.value
138
116
 
139
117
  if value is None:
140
118
  raise ValueError(f"Filter parameter 'value' not found in {filter_feature.parameter}")
@@ -151,11 +129,7 @@ class PyArrowFilterEngine(BaseFilterEngine):
151
129
  column_name = str(filter_feature.name)
152
130
 
153
131
  # Extract the values from the parameter
154
- values = None
155
- for param in filter_feature.parameter:
156
- if param[0] == "values":
157
- values = param[1]
158
- break
132
+ values = filter_feature.parameter.values
159
133
 
160
134
  if values is None:
161
135
  raise ValueError(f"Filter parameter 'values' not found in {filter_feature.parameter}")
@@ -16,7 +16,7 @@ except ImportError:
16
16
  pd = None
17
17
 
18
18
 
19
- class PyarrowTable(ComputeFrameWork):
19
+ class PyArrowTable(ComputeFrameWork):
20
20
  @staticmethod
21
21
  def is_available() -> bool:
22
22
  """Check if PyArrow is installed and available."""
@@ -45,11 +45,8 @@ class PythonDictFilterEngine(BaseFilterEngine):
45
45
  column_name = filter_feature.name
46
46
 
47
47
  # Extract the value from the parameter
48
- value = None
49
- for param in filter_feature.parameter:
50
- if param[0] == "value":
51
- value = param[1]
52
- break
48
+
49
+ value = filter_feature.parameter.value
53
50
 
54
51
  if value is None:
55
52
  raise ValueError(f"Filter parameter 'value' not found in {filter_feature.parameter}")
@@ -61,14 +58,10 @@ class PythonDictFilterEngine(BaseFilterEngine):
61
58
  column_name = filter_feature.name
62
59
 
63
60
  # Check if this is a complex parameter with max/max_exclusive or a simple one with value
64
- has_max = False
65
- has_value = False
66
61
 
67
- for param in filter_feature.parameter:
68
- if param[0] == "max":
69
- has_max = True
70
- elif param[0] == "value":
71
- has_value = True
62
+ has_max = filter_feature.parameter.max_value is not None
63
+
64
+ has_value = filter_feature.parameter.value is not None
72
65
 
73
66
  if has_max:
74
67
  # Complex parameter - use get_min_max_operator
@@ -94,11 +87,8 @@ class PythonDictFilterEngine(BaseFilterEngine):
94
87
  ]
95
88
  elif has_value:
96
89
  # Simple parameter - extract the value
97
- value = None
98
- for param in filter_feature.parameter:
99
- if param[0] == "value":
100
- value = param[1]
101
- break
90
+
91
+ value = filter_feature.parameter.value
102
92
 
103
93
  if value is None:
104
94
  raise ValueError(f"Filter parameter 'value' not found in {filter_feature.parameter}")
@@ -112,11 +102,8 @@ class PythonDictFilterEngine(BaseFilterEngine):
112
102
  column_name = filter_feature.name
113
103
 
114
104
  # Extract the value from the parameter
115
- value = None
116
- for param in filter_feature.parameter:
117
- if param[0] == "value":
118
- value = param[1]
119
- break
105
+
106
+ value = filter_feature.parameter.value
120
107
 
121
108
  if value is None:
122
109
  raise ValueError(f"Filter parameter 'value' not found in {filter_feature.parameter}")
@@ -128,11 +115,8 @@ class PythonDictFilterEngine(BaseFilterEngine):
128
115
  column_name = filter_feature.name
129
116
 
130
117
  # Extract the value from the parameter
131
- value = None
132
- for param in filter_feature.parameter:
133
- if param[0] == "value":
134
- value = param[1]
135
- break
118
+
119
+ value = filter_feature.parameter.value
136
120
 
137
121
  if value is None:
138
122
  raise ValueError(f"Filter parameter 'value' not found in {filter_feature.parameter}")
@@ -151,11 +135,8 @@ class PythonDictFilterEngine(BaseFilterEngine):
151
135
  column_name = filter_feature.name
152
136
 
153
137
  # Extract the values from the parameter
154
- values = None
155
- for param in filter_feature.parameter:
156
- if param[0] == "values":
157
- values = param[1]
158
- break
138
+
139
+ values = filter_feature.parameter.values
159
140
 
160
141
  if values is None:
161
142
  raise ValueError(f"Filter parameter 'values' not found in {filter_feature.parameter}")
@@ -82,7 +82,7 @@ class PythonDictFramework(ComputeFrameWork):
82
82
 
83
83
  transformed_data = self.apply_compute_framework_transformer(data)
84
84
  if transformed_data is not None:
85
- return transformed_data # type: ignore
85
+ return transformed_data # type: ignore[no-any-return]
86
86
 
87
87
  if isinstance(data, dict):
88
88
  """Initial data: Transform columnar dict to row-based list of dicts"""
@@ -8,7 +8,7 @@ except ImportError:
8
8
  pa = None
9
9
 
10
10
 
11
- class PythonDictPyarrowTransformer(BaseTransformer):
11
+ class PythonDictPyArrowTransformer(BaseTransformer):
12
12
  """
13
13
  Transformer for converting between PythonDict (List[Dict]) and PyArrow Table.
14
14
 
@@ -37,11 +37,8 @@ class SparkFilterEngine(BaseFilterEngine):
37
37
  column_name = filter_feature.name.name
38
38
 
39
39
  # Extract the value from the parameter
40
- value = None
41
- for param in filter_feature.parameter:
42
- if param[0] == "value":
43
- value = param[1]
44
- break
40
+
41
+ value = filter_feature.parameter.value
45
42
 
46
43
  if value is None:
47
44
  raise ValueError(f"Filter parameter 'value' not found in {filter_feature.parameter}")
@@ -53,14 +50,10 @@ class SparkFilterEngine(BaseFilterEngine):
53
50
  column_name = filter_feature.name.name
54
51
 
55
52
  # Check if this is a complex parameter with max/max_exclusive or a simple one with value
56
- has_max = False
57
- has_value = False
58
53
 
59
- for param in filter_feature.parameter:
60
- if param[0] == "max":
61
- has_max = True
62
- elif param[0] == "value":
63
- has_value = True
54
+ has_max = filter_feature.parameter.max_value is not None
55
+
56
+ has_value = filter_feature.parameter.value is not None
64
57
 
65
58
  if has_max:
66
59
  # Complex parameter - use get_min_max_operator
@@ -82,11 +75,8 @@ class SparkFilterEngine(BaseFilterEngine):
82
75
  condition = F.col(column_name) <= max_parameter
83
76
  elif has_value:
84
77
  # Simple parameter - extract the value
85
- value = None
86
- for param in filter_feature.parameter:
87
- if param[0] == "value":
88
- value = param[1]
89
- break
78
+
79
+ value = filter_feature.parameter.value
90
80
 
91
81
  if value is None:
92
82
  raise ValueError(f"Filter parameter 'value' not found in {filter_feature.parameter}")
@@ -102,11 +92,8 @@ class SparkFilterEngine(BaseFilterEngine):
102
92
  column_name = filter_feature.name.name
103
93
 
104
94
  # Extract the value from the parameter
105
- value = None
106
- for param in filter_feature.parameter:
107
- if param[0] == "value":
108
- value = param[1]
109
- break
95
+
96
+ value = filter_feature.parameter.value
110
97
 
111
98
  if value is None:
112
99
  raise ValueError(f"Filter parameter 'value' not found in {filter_feature.parameter}")
@@ -118,11 +105,8 @@ class SparkFilterEngine(BaseFilterEngine):
118
105
  column_name = filter_feature.name.name
119
106
 
120
107
  # Extract the value from the parameter
121
- value = None
122
- for param in filter_feature.parameter:
123
- if param[0] == "value":
124
- value = param[1]
125
- break
108
+
109
+ value = filter_feature.parameter.value
126
110
 
127
111
  if value is None:
128
112
  raise ValueError(f"Filter parameter 'value' not found in {filter_feature.parameter}")
@@ -135,11 +119,8 @@ class SparkFilterEngine(BaseFilterEngine):
135
119
  column_name = filter_feature.name.name
136
120
 
137
121
  # Extract the values from the parameter
138
- values = None
139
- for param in filter_feature.parameter:
140
- if param[0] == "values":
141
- values = param[1]
142
- break
122
+
123
+ values = filter_feature.parameter.values
143
124
 
144
125
  if values is None:
145
126
  raise ValueError(f"Filter parameter 'values' not found in {filter_feature.parameter}")
@@ -119,14 +119,14 @@ class SparkFramework(ComputeFrameWork):
119
119
 
120
120
  # Handle empty dict
121
121
  if not data:
122
- return spark.createDataFrame([], StructType([])) # type: ignore
122
+ return spark.createDataFrame([], StructType([])) # type: ignore[union-attr]
123
123
 
124
124
  # Infer schema from the first row of data
125
125
  first_key = next(iter(data.keys()))
126
126
  if not data[first_key]: # Empty list
127
127
  schema_fields = [StructField(col, StringType(), True) for col in data.keys()]
128
128
  schema = StructType(schema_fields)
129
- return spark.createDataFrame([], schema) # type: ignore
129
+ return spark.createDataFrame([], schema) # type: ignore[union-attr]
130
130
 
131
131
  # Create schema based on first values
132
132
  schema_fields = []
@@ -146,9 +146,9 @@ class SparkFramework(ComputeFrameWork):
146
146
  for i in range(num_rows):
147
147
  row = tuple(data[col][i] for col in data.keys())
148
148
  rows.append(row)
149
- return spark.createDataFrame(rows, schema) # type: ignore
149
+ return spark.createDataFrame(rows, schema) # type: ignore[union-attr]
150
150
  else:
151
- return spark.createDataFrame([], schema) # type: ignore
151
+ return spark.createDataFrame([], schema) # type: ignore[union-attr]
152
152
 
153
153
  if hasattr(data, "__iter__") and not isinstance(data, (str, bytes, DataFrame)):
154
154
  """Added data: Add column to DataFrame"""
@@ -14,7 +14,7 @@ except ImportError:
14
14
  pa = None
15
15
 
16
16
 
17
- class SparkPyarrowTransformer(BaseTransformer):
17
+ class SparkPyArrowTransformer(BaseTransformer):
18
18
  """
19
19
  Transformer for converting between Spark DataFrame and PyArrow Table.
20
20
 
@@ -14,7 +14,7 @@ from mloda_plugins.feature_group.experimental.default_options_key import Default
14
14
 
15
15
 
16
16
  def process_nested_features(options: Dict[str, Any]) -> Dict[str, Any]:
17
- """Recursively convert nested mloda_source_features dicts to Feature objects.
17
+ """Recursively convert nested in_features dicts to Feature objects.
18
18
 
19
19
  Args:
20
20
  options: Dictionary of options that may contain nested feature definitions
@@ -24,11 +24,11 @@ def process_nested_features(options: Dict[str, Any]) -> Dict[str, Any]:
24
24
  """
25
25
  processed: Dict[str, Any] = {}
26
26
  for key, value in options.items():
27
- if key == "mloda_source_features" and isinstance(value, dict):
27
+ if key == "in_features" and isinstance(value, dict):
28
28
  # This is a nested feature definition - convert it to a Feature object
29
29
  feature_name = value.get("name")
30
30
  if not feature_name:
31
- raise ValueError(f"Nested mloda_source_features must have a 'name' field: {value}")
31
+ raise ValueError(f"Nested in_features must have a 'name' field: {value}")
32
32
 
33
33
  # Recursively process nested options
34
34
  nested_options = value.get("options", {})
@@ -39,17 +39,15 @@ def process_nested_features(options: Dict[str, Any]) -> Dict[str, Any]:
39
39
  if mloda_sources:
40
40
  if isinstance(mloda_sources, list):
41
41
  # For list, convert each to string (single sources) or keep as-is
42
- processed_nested_options["mloda_source_features"] = (
42
+ processed_nested_options["in_features"] = (
43
43
  mloda_sources if len(mloda_sources) > 1 else mloda_sources[0]
44
44
  )
45
45
  elif isinstance(mloda_sources, dict):
46
46
  # Recursively create Feature for mloda_sources
47
- mloda_source_features = process_nested_features({"mloda_source_features": mloda_sources})[
48
- "mloda_source_features"
49
- ]
50
- processed_nested_options["mloda_source_features"] = mloda_source_features
47
+ in_features = process_nested_features({"in_features": mloda_sources})["in_features"]
48
+ processed_nested_options["in_features"] = in_features
51
49
  else:
52
- processed_nested_options["mloda_source_features"] = mloda_sources
50
+ processed_nested_options["in_features"] = mloda_sources
53
51
 
54
52
  # Create the Feature object
55
53
  processed[key] = Feature(name=feature_name, options=processed_nested_options)
@@ -104,7 +102,7 @@ def load_features_from_config(config_str: str, format: str = "json") -> List[Uni
104
102
  # Handle mloda_sources if present
105
103
  if item.mloda_sources:
106
104
  # Always convert to frozenset for consistency
107
- context[DefaultOptionKeys.mloda_source_features] = frozenset(item.mloda_sources)
105
+ context[DefaultOptionKeys.in_features] = frozenset(item.mloda_sources)
108
106
  options = Options(group=item.group_options or {}, context=context)
109
107
  feature = Feature(name=feature_name, options=options)
110
108
  features.append(feature)
@@ -115,9 +113,7 @@ def load_features_from_config(config_str: str, format: str = "json") -> List[Uni
115
113
  processed_options = process_nested_features(item.options)
116
114
  # Always convert to frozenset for consistency (even single items)
117
115
  source_value = frozenset(item.mloda_sources)
118
- options = Options(
119
- group=processed_options, context={DefaultOptionKeys.mloda_source_features: source_value}
120
- )
116
+ options = Options(group=processed_options, context={DefaultOptionKeys.in_features: source_value})
121
117
  feature = Feature(name=feature_name, options=options)
122
118
  features.append(feature)
123
119
  feature_registry[feature_name] = feature
@@ -133,16 +129,14 @@ def load_features_from_config(config_str: str, format: str = "json") -> List[Uni
133
129
  # Pass 2: Resolve @feature_name references to Feature objects
134
130
  for feat in features:
135
131
  if isinstance(feat, Feature):
136
- mloda_source = feat.options.context.get(DefaultOptionKeys.mloda_source_features)
132
+ mloda_source = feat.options.context.get(DefaultOptionKeys.in_features)
137
133
  if mloda_source:
138
134
  # Handle both single string and frozenset of strings
139
135
  if isinstance(mloda_source, str) and mloda_source.startswith("@"):
140
136
  # Single reference string
141
137
  referenced_name = mloda_source[1:]
142
138
  if referenced_name in feature_registry:
143
- feat.options.context[DefaultOptionKeys.mloda_source_features] = feature_registry[
144
- referenced_name
145
- ]
139
+ feat.options.context[DefaultOptionKeys.in_features] = feature_registry[referenced_name]
146
140
  else:
147
141
  raise ValueError(f"Feature reference '@{referenced_name}' not found in configuration")
148
142
  elif isinstance(mloda_source, frozenset):
@@ -159,6 +153,6 @@ def load_features_from_config(config_str: str, format: str = "json") -> List[Uni
159
153
  resolved_sources.append(source)
160
154
  # Only replace if we actually resolved any references
161
155
  if any(isinstance(s, str) and s.startswith("@") for s in mloda_source):
162
- feat.options.context[DefaultOptionKeys.mloda_source_features] = frozenset(resolved_sources)
156
+ feat.options.context[DefaultOptionKeys.in_features] = frozenset(resolved_sources)
163
157
 
164
158
  return features
@@ -4,6 +4,7 @@ Base implementation for aggregated feature groups.
4
4
 
5
5
  from __future__ import annotations
6
6
 
7
+ from abc import abstractmethod
7
8
  from typing import Any, List, Optional, Set, Union
8
9
 
9
10
  from mloda_core.abstract_plugins.abstract_feature_group import AbstractFeatureGroup
@@ -40,7 +41,7 @@ class AggregatedFeatureGroup(AbstractFeatureGroup):
40
41
 
41
42
  ### 1. String-Based Creation
42
43
 
43
- Features follow the naming pattern: `{mloda_source_features}__{aggregation_type}_aggr`
44
+ Features follow the naming pattern: `{in_features}__{aggregation_type}_aggr`
44
45
 
45
46
  Examples:
46
47
  ```python
@@ -62,7 +63,7 @@ class AggregatedFeatureGroup(AbstractFeatureGroup):
62
63
  options=Options(
63
64
  context={
64
65
  AggregatedFeatureGroup.AGGREGATION_TYPE: "sum",
65
- DefaultOptionKeys.mloda_source_features: "sales",
66
+ DefaultOptionKeys.in_features: "sales",
66
67
  }
67
68
  )
68
69
  )
@@ -73,7 +74,7 @@ class AggregatedFeatureGroup(AbstractFeatureGroup):
73
74
  ### Context Parameters (Default)
74
75
  These parameters don't affect Feature Group resolution/splitting:
75
76
  - `aggregation_type`: The type of aggregation to perform
76
- - `mloda_source_features`: The source feature to aggregate
77
+ - `in_features`: The source feature to aggregate
77
78
 
78
79
  ### Group Parameters
79
80
  Currently none for AggregatedFeatureGroup. Parameters that affect Feature Group
@@ -96,7 +97,6 @@ class AggregatedFeatureGroup(AbstractFeatureGroup):
96
97
  "median": "Median value",
97
98
  }
98
99
 
99
- PATTERN = "__"
100
100
  PREFIX_PATTERN = r".*__([\w]+)_aggr$"
101
101
 
102
102
  # Property mapping for configuration-based feature creation
@@ -106,7 +106,7 @@ class AggregatedFeatureGroup(AbstractFeatureGroup):
106
106
  DefaultOptionKeys.mloda_context: True, # Mark as context parameter
107
107
  DefaultOptionKeys.mloda_strict_validation: True, # Enable strict validation
108
108
  },
109
- DefaultOptionKeys.mloda_source_features: {
109
+ DefaultOptionKeys.in_features: {
110
110
  "explanation": "Source feature to aggregate",
111
111
  DefaultOptionKeys.mloda_context: True, # Mark as context parameter
112
112
  DefaultOptionKeys.mloda_strict_validation: False, # Flexible validation
@@ -119,12 +119,12 @@ class AggregatedFeatureGroup(AbstractFeatureGroup):
119
119
  source_feature: str | None = None
120
120
 
121
121
  # string based
122
- _, source_feature = FeatureChainParser.parse_feature_name(feature_name, self.PATTERN, [self.PREFIX_PATTERN])
122
+ _, source_feature = FeatureChainParser.parse_feature_name(feature_name, [self.PREFIX_PATTERN])
123
123
  if source_feature is not None:
124
124
  return {Feature(source_feature)}
125
125
 
126
126
  # configuration based
127
- source_features = options.get_source_features()
127
+ source_features = options.get_in_features()
128
128
  if len(source_features) != 1:
129
129
  raise ValueError(
130
130
  f"Expected exactly one source feature, but found {len(source_features)}: {source_features}"
@@ -134,7 +134,7 @@ class AggregatedFeatureGroup(AbstractFeatureGroup):
134
134
  @classmethod
135
135
  def get_aggregation_type(cls, feature_name: str) -> str:
136
136
  """Extract the aggregation type from the feature name."""
137
- prefix_part, _ = FeatureChainParser.parse_feature_name(feature_name, cls.PATTERN, [cls.PREFIX_PATTERN])
137
+ prefix_part, _ = FeatureChainParser.parse_feature_name(feature_name, [cls.PREFIX_PATTERN])
138
138
  if prefix_part is None:
139
139
  raise ValueError(f"Could not extract aggregation type from feature name: {feature_name}")
140
140
  return prefix_part
@@ -153,7 +153,6 @@ class AggregatedFeatureGroup(AbstractFeatureGroup):
153
153
  feature_name,
154
154
  options,
155
155
  property_mapping=cls.PROPERTY_MAPPING,
156
- pattern=cls.PATTERN,
157
156
  prefix_patterns=[cls.PREFIX_PATTERN],
158
157
  )
159
158
 
@@ -178,13 +177,13 @@ class AggregatedFeatureGroup(AbstractFeatureGroup):
178
177
 
179
178
  # string based
180
179
  aggregation_type, source_feature_name = FeatureChainParser.parse_feature_name(
181
- feature.name, cls.PATTERN, [cls.PREFIX_PATTERN]
180
+ feature.name, [cls.PREFIX_PATTERN]
182
181
  )
183
182
  if aggregation_type is not None and source_feature_name is not None:
184
183
  return aggregation_type, source_feature_name
185
184
 
186
185
  # configuration based
187
- source_features = feature.options.get_source_features()
186
+ source_features = feature.options.get_in_features()
188
187
  source_feature = next(iter(source_features))
189
188
  source_feature_name = source_feature.get_name()
190
189
 
@@ -243,6 +242,7 @@ class AggregatedFeatureGroup(AbstractFeatureGroup):
243
242
  return data
244
243
 
245
244
  @classmethod
245
+ @abstractmethod
246
246
  def _get_available_columns(cls, data: Any) -> Set[str]:
247
247
  """
248
248
  Get the set of available column names from the data.
@@ -253,9 +253,10 @@ class AggregatedFeatureGroup(AbstractFeatureGroup):
253
253
  Returns:
254
254
  Set of column names available in the data
255
255
  """
256
- raise NotImplementedError(f"_get_available_columns not implemented in {cls.__name__}")
256
+ ...
257
257
 
258
258
  @classmethod
259
+ @abstractmethod
259
260
  def _check_source_features_exist(cls, data: Any, feature_names: List[str]) -> None:
260
261
  """
261
262
  Check if the resolved source features exist in the data.
@@ -267,9 +268,10 @@ class AggregatedFeatureGroup(AbstractFeatureGroup):
267
268
  Raises:
268
269
  ValueError: If none of the features exist in the data
269
270
  """
270
- raise NotImplementedError(f"_check_source_features_exist not implemented in {cls.__name__}")
271
+ ...
271
272
 
272
273
  @classmethod
274
+ @abstractmethod
273
275
  def _add_result_to_data(cls, data: Any, feature_name: str, result: Any) -> Any:
274
276
  """
275
277
  Add the result to the data.
@@ -282,10 +284,11 @@ class AggregatedFeatureGroup(AbstractFeatureGroup):
282
284
  Returns:
283
285
  The updated data
284
286
  """
285
- raise NotImplementedError(f"_add_result_to_data not implemented in {cls.__name__}")
287
+ ...
286
288
 
287
289
  @classmethod
288
- def _perform_aggregation(cls, data: Any, aggregation_type: str, mloda_source_features: List[str]) -> Any:
290
+ @abstractmethod
291
+ def _perform_aggregation(cls, data: Any, aggregation_type: str, in_features: List[str]) -> Any:
289
292
  """
290
293
  Method to perform the aggregation. Should be implemented by subclasses.
291
294
 
@@ -296,9 +299,9 @@ class AggregatedFeatureGroup(AbstractFeatureGroup):
296
299
  Args:
297
300
  data: The input data
298
301
  aggregation_type: The type of aggregation to perform
299
- mloda_source_features: List of resolved source feature names to aggregate
302
+ in_features: List of resolved source feature names to aggregate
300
303
 
301
304
  Returns:
302
305
  The result of the aggregation
303
306
  """
304
- raise NotImplementedError(f"_perform_aggregation not implemented in {cls.__name__}")
307
+ ...