mloda 0.3.2__py3-none-any.whl → 0.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (201) hide show
  1. mloda/__init__.py +17 -0
  2. {mloda_core → mloda/core}/abstract_plugins/components/base_artifact.py +2 -2
  3. {mloda_core → mloda/core}/abstract_plugins/components/base_validator.py +13 -0
  4. {mloda_core → mloda/core}/abstract_plugins/components/data_access_collection.py +1 -1
  5. {mloda_core → mloda/core}/abstract_plugins/components/data_types.py +39 -0
  6. {mloda_core → mloda/core}/abstract_plugins/components/feature.py +39 -33
  7. {mloda_core → mloda/core}/abstract_plugins/components/feature_chainer/feature_chain_parser.py +19 -19
  8. mloda/core/abstract_plugins/components/feature_chainer/feature_chain_parser_mixin.py +197 -0
  9. {mloda_core → mloda/core}/abstract_plugins/components/feature_collection.py +6 -6
  10. {mloda_core → mloda/core}/abstract_plugins/components/feature_group_version.py +8 -8
  11. {mloda_core → mloda/core}/abstract_plugins/components/feature_set.py +18 -24
  12. {mloda_core → mloda/core}/abstract_plugins/components/framework_transformer/cfw_transformer.py +2 -2
  13. {mloda_core → mloda/core}/abstract_plugins/components/index/add_index_feature.py +4 -4
  14. {mloda_core → mloda/core}/abstract_plugins/components/input_data/api/api_input_data.py +3 -3
  15. {mloda_core → mloda/core}/abstract_plugins/components/input_data/api/api_input_data_collection.py +2 -2
  16. {mloda_core → mloda/core}/abstract_plugins/components/input_data/api/base_api_data.py +1 -1
  17. {mloda_core → mloda/core}/abstract_plugins/components/input_data/base_input_data.py +6 -6
  18. {mloda_core → mloda/core}/abstract_plugins/components/input_data/creator/data_creator.py +3 -3
  19. mloda/core/abstract_plugins/components/link.py +437 -0
  20. {mloda_core → mloda/core}/abstract_plugins/components/match_data/match_data.py +3 -3
  21. {mloda_core → mloda/core}/abstract_plugins/components/merge/base_merge_engine.py +2 -2
  22. {mloda_core → mloda/core}/abstract_plugins/components/options.py +12 -36
  23. {mloda_core → mloda/core}/abstract_plugins/components/parallelization_modes.py +1 -1
  24. {mloda_core → mloda/core}/abstract_plugins/components/plugin_option/plugin_collector.py +14 -14
  25. mloda/core/abstract_plugins/components/validators/datatype_validator.py +96 -0
  26. mloda/core/abstract_plugins/components/validators/feature_set_validator.py +38 -0
  27. mloda/core/abstract_plugins/components/validators/feature_validator.py +23 -0
  28. mloda/core/abstract_plugins/components/validators/link_validator.py +79 -0
  29. mloda/core/abstract_plugins/components/validators/options_validator.py +57 -0
  30. mloda_core/abstract_plugins/compute_frame_work.py → mloda/core/abstract_plugins/compute_framework.py +46 -37
  31. mloda_core/abstract_plugins/abstract_feature_group.py → mloda/core/abstract_plugins/feature_group.py +56 -33
  32. mloda/core/abstract_plugins/function_extender.py +78 -0
  33. mloda/core/api/plugin_docs.py +220 -0
  34. mloda/core/api/plugin_info.py +32 -0
  35. {mloda_core → mloda/core}/api/prepare/setup_compute_framework.py +11 -11
  36. {mloda_core → mloda/core}/api/request.py +42 -33
  37. {mloda_core → mloda/core}/core/cfw_manager.py +8 -8
  38. {mloda_core → mloda/core}/core/engine.py +47 -46
  39. {mloda_core → mloda/core}/core/step/abstract_step.py +7 -7
  40. {mloda_core → mloda/core}/core/step/feature_group_step.py +12 -12
  41. {mloda_core → mloda/core}/core/step/join_step.py +14 -14
  42. {mloda_core → mloda/core}/core/step/transform_frame_work_step.py +16 -16
  43. {mloda_core → mloda/core}/filter/filter_engine.py +1 -1
  44. {mloda_core → mloda/core}/filter/filter_type_enum.py +1 -1
  45. {mloda_core → mloda/core}/filter/global_filter.py +23 -23
  46. {mloda_core → mloda/core}/filter/single_filter.py +6 -6
  47. {mloda_core → mloda/core}/prepare/accessible_plugins.py +16 -18
  48. {mloda_core → mloda/core}/prepare/execution_plan.py +65 -39
  49. {mloda_core → mloda/core}/prepare/graph/build_graph.py +6 -6
  50. {mloda_core → mloda/core}/prepare/graph/graph.py +1 -1
  51. {mloda_core → mloda/core}/prepare/graph/properties.py +5 -5
  52. {mloda_core → mloda/core}/prepare/identify_feature_group.py +12 -14
  53. {mloda_core → mloda/core}/prepare/joinstep_collection.py +3 -3
  54. {mloda_core → mloda/core}/prepare/resolve_compute_frameworks.py +6 -6
  55. {mloda_core → mloda/core}/prepare/resolve_graph.py +11 -11
  56. {mloda_core → mloda/core}/prepare/resolve_links.py +31 -40
  57. mloda/core/prepare/validators/resolve_link_validator.py +32 -0
  58. mloda/core/runtime/compute_framework_executor.py +271 -0
  59. mloda/core/runtime/data_lifecycle_manager.py +160 -0
  60. mloda/core/runtime/flight/__init__.py +0 -0
  61. {mloda_core → mloda/core}/runtime/flight/runner_flight_server.py +1 -1
  62. mloda/core/runtime/run.py +317 -0
  63. mloda/core/runtime/worker/__init__.py +0 -0
  64. {mloda_core → mloda/core}/runtime/worker/multiprocessing_worker.py +15 -10
  65. {mloda_core → mloda/core}/runtime/worker/thread_worker.py +2 -2
  66. mloda/core/runtime/worker_manager.py +96 -0
  67. mloda/provider/__init__.py +101 -0
  68. mloda/steward/__init__.py +25 -0
  69. mloda/user/__init__.py +57 -0
  70. {mloda-0.3.2.dist-info → mloda-0.4.0.dist-info}/METADATA +18 -22
  71. mloda-0.4.0.dist-info/RECORD +248 -0
  72. {mloda-0.3.2.dist-info → mloda-0.4.0.dist-info}/top_level.txt +1 -1
  73. mloda_plugins/compute_framework/base_implementations/duckdb/duckdb_filter_engine.py +2 -2
  74. mloda_plugins/compute_framework/base_implementations/duckdb/duckdb_framework.py +15 -13
  75. mloda_plugins/compute_framework/base_implementations/duckdb/duckdb_merge_engine.py +3 -3
  76. mloda_plugins/compute_framework/base_implementations/duckdb/duckdb_pyarrow_transformer.py +1 -1
  77. mloda_plugins/compute_framework/base_implementations/iceberg/iceberg_filter_engine.py +2 -2
  78. mloda_plugins/compute_framework/base_implementations/iceberg/iceberg_framework.py +12 -10
  79. mloda_plugins/compute_framework/base_implementations/iceberg/iceberg_pyarrow_transformer.py +1 -1
  80. mloda_plugins/compute_framework/base_implementations/pandas/dataframe.py +18 -16
  81. mloda_plugins/compute_framework/base_implementations/pandas/pandas_filter_engine.py +36 -13
  82. mloda_plugins/compute_framework/base_implementations/pandas/pandas_merge_engine.py +7 -7
  83. mloda_plugins/compute_framework/base_implementations/pandas/pandaspyarrowtransformer.py +1 -1
  84. mloda_plugins/compute_framework/base_implementations/polars/dataframe.py +16 -14
  85. mloda_plugins/compute_framework/base_implementations/polars/lazy_dataframe.py +13 -12
  86. mloda_plugins/compute_framework/base_implementations/polars/polars_filter_engine.py +2 -2
  87. mloda_plugins/compute_framework/base_implementations/polars/polars_lazy_pyarrow_transformer.py +1 -1
  88. mloda_plugins/compute_framework/base_implementations/polars/polars_merge_engine.py +3 -3
  89. mloda_plugins/compute_framework/base_implementations/polars/polars_pyarrow_transformer.py +1 -1
  90. mloda_plugins/compute_framework/base_implementations/pyarrow/pyarrow_filter_engine.py +2 -2
  91. mloda_plugins/compute_framework/base_implementations/pyarrow/pyarrow_merge_engine.py +3 -3
  92. mloda_plugins/compute_framework/base_implementations/pyarrow/table.py +12 -10
  93. mloda_plugins/compute_framework/base_implementations/python_dict/python_dict_filter_engine.py +2 -2
  94. mloda_plugins/compute_framework/base_implementations/python_dict/python_dict_framework.py +11 -9
  95. mloda_plugins/compute_framework/base_implementations/python_dict/python_dict_merge_engine.py +3 -3
  96. mloda_plugins/compute_framework/base_implementations/python_dict/python_dict_pyarrow_transformer.py +1 -1
  97. mloda_plugins/compute_framework/base_implementations/spark/spark_filter_engine.py +2 -2
  98. mloda_plugins/compute_framework/base_implementations/spark/spark_framework.py +17 -15
  99. mloda_plugins/compute_framework/base_implementations/spark/spark_merge_engine.py +3 -3
  100. mloda_plugins/compute_framework/base_implementations/spark/spark_pyarrow_transformer.py +1 -1
  101. mloda_plugins/config/feature/loader.py +2 -2
  102. mloda_plugins/feature_group/experimental/aggregated_feature_group/base.py +45 -62
  103. mloda_plugins/feature_group/experimental/aggregated_feature_group/pandas.py +2 -2
  104. mloda_plugins/feature_group/experimental/aggregated_feature_group/polars_lazy.py +2 -2
  105. mloda_plugins/feature_group/experimental/aggregated_feature_group/pyarrow.py +2 -2
  106. mloda_plugins/feature_group/experimental/clustering/base.py +69 -97
  107. mloda_plugins/feature_group/experimental/clustering/pandas.py +2 -2
  108. mloda_plugins/feature_group/experimental/data_quality/missing_value/base.py +58 -79
  109. mloda_plugins/feature_group/experimental/data_quality/missing_value/pandas.py +2 -2
  110. mloda_plugins/feature_group/experimental/data_quality/missing_value/pyarrow.py +2 -2
  111. mloda_plugins/feature_group/experimental/data_quality/missing_value/python_dict.py +2 -2
  112. mloda_plugins/feature_group/experimental/default_options_key.py +16 -19
  113. mloda_plugins/feature_group/experimental/dimensionality_reduction/base.py +80 -94
  114. mloda_plugins/feature_group/experimental/dimensionality_reduction/pandas.py +2 -2
  115. mloda_plugins/feature_group/experimental/dynamic_feature_group_factory/dynamic_feature_group_factory.py +24 -24
  116. mloda_plugins/feature_group/experimental/forecasting/base.py +106 -104
  117. mloda_plugins/feature_group/experimental/forecasting/forecasting_artifact.py +2 -2
  118. mloda_plugins/feature_group/experimental/forecasting/pandas.py +15 -15
  119. mloda_plugins/feature_group/experimental/geo_distance/base.py +50 -42
  120. mloda_plugins/feature_group/experimental/geo_distance/pandas.py +2 -2
  121. mloda_plugins/feature_group/experimental/llm/cli.py +4 -4
  122. mloda_plugins/feature_group/experimental/llm/cli_features/refactor_git_cached.py +19 -19
  123. mloda_plugins/feature_group/experimental/llm/installed_packages_feature_group.py +8 -8
  124. mloda_plugins/feature_group/experimental/llm/list_directory_feature_group.py +5 -5
  125. mloda_plugins/feature_group/experimental/llm/llm_api/claude.py +3 -3
  126. mloda_plugins/feature_group/experimental/llm/llm_api/gemini.py +3 -3
  127. mloda_plugins/feature_group/experimental/llm/llm_api/llm_base_request.py +5 -5
  128. mloda_plugins/feature_group/experimental/llm/llm_api/openai.py +3 -3
  129. mloda_plugins/feature_group/experimental/llm/llm_api/request_loop.py +6 -6
  130. mloda_plugins/feature_group/experimental/llm/llm_file_selector.py +10 -10
  131. mloda_plugins/feature_group/experimental/llm/tools/tool_collection.py +1 -1
  132. mloda_plugins/feature_group/experimental/node_centrality/base.py +46 -72
  133. mloda_plugins/feature_group/experimental/node_centrality/pandas.py +2 -2
  134. mloda_plugins/feature_group/experimental/sklearn/encoding/base.py +51 -51
  135. mloda_plugins/feature_group/experimental/sklearn/encoding/pandas.py +2 -2
  136. mloda_plugins/feature_group/experimental/sklearn/pipeline/base.py +52 -39
  137. mloda_plugins/feature_group/experimental/sklearn/pipeline/pandas.py +2 -2
  138. mloda_plugins/feature_group/experimental/sklearn/scaling/base.py +44 -58
  139. mloda_plugins/feature_group/experimental/sklearn/scaling/pandas.py +2 -2
  140. mloda_plugins/feature_group/experimental/sklearn/sklearn_artifact.py +2 -2
  141. mloda_plugins/feature_group/experimental/source_input_feature.py +15 -15
  142. mloda_plugins/feature_group/experimental/text_cleaning/base.py +38 -61
  143. mloda_plugins/feature_group/experimental/text_cleaning/pandas.py +2 -2
  144. mloda_plugins/feature_group/experimental/text_cleaning/python_dict.py +2 -2
  145. mloda_plugins/feature_group/experimental/time_window/base.py +106 -93
  146. mloda_plugins/feature_group/experimental/time_window/pandas.py +13 -13
  147. mloda_plugins/feature_group/experimental/time_window/pyarrow.py +12 -12
  148. mloda_plugins/feature_group/input_data/api_data/api_data.py +9 -11
  149. mloda_plugins/feature_group/input_data/read_context_files.py +7 -7
  150. mloda_plugins/feature_group/input_data/read_db.py +7 -9
  151. mloda_plugins/feature_group/input_data/read_db_feature.py +4 -4
  152. mloda_plugins/feature_group/input_data/read_dbs/sqlite.py +23 -13
  153. mloda_plugins/feature_group/input_data/read_file.py +8 -8
  154. mloda_plugins/feature_group/input_data/read_file_feature.py +4 -4
  155. mloda_plugins/feature_group/input_data/read_files/csv.py +6 -6
  156. mloda_plugins/feature_group/input_data/read_files/feather.py +5 -5
  157. mloda_plugins/feature_group/input_data/read_files/json.py +5 -5
  158. mloda_plugins/feature_group/input_data/read_files/orc.py +5 -5
  159. mloda_plugins/feature_group/input_data/read_files/parquet.py +5 -5
  160. mloda_plugins/feature_group/input_data/read_files/text_file_reader.py +5 -5
  161. mloda_plugins/function_extender/base_implementations/otel/otel_extender.py +4 -4
  162. mloda-0.3.2.dist-info/RECORD +0 -230
  163. mloda_core/abstract_plugins/components/link.py +0 -286
  164. mloda_core/abstract_plugins/function_extender.py +0 -34
  165. mloda_core/runtime/run.py +0 -617
  166. {mloda_core → mloda/core}/__init__.py +0 -0
  167. {mloda_core → mloda/core}/abstract_plugins/__init__.py +0 -0
  168. {mloda_core → mloda/core}/abstract_plugins/components/__init__.py +0 -0
  169. {mloda_core → mloda/core}/abstract_plugins/components/domain.py +0 -0
  170. {mloda_core → mloda/core}/abstract_plugins/components/feature_chainer/__init__.py +0 -0
  171. {mloda_core → mloda/core}/abstract_plugins/components/feature_name.py +0 -0
  172. {mloda_core → mloda/core}/abstract_plugins/components/framework_transformer/__init__.py +0 -0
  173. {mloda_core → mloda/core}/abstract_plugins/components/framework_transformer/base_transformer.py +0 -0
  174. {mloda_core → mloda/core}/abstract_plugins/components/hashable_dict.py +0 -0
  175. {mloda_core → mloda/core}/abstract_plugins/components/index/__init__.py +0 -0
  176. {mloda_core → mloda/core}/abstract_plugins/components/index/index.py +0 -0
  177. {mloda_core → mloda/core}/abstract_plugins/components/input_data/__init__.py +0 -0
  178. {mloda_core → mloda/core}/abstract_plugins/components/input_data/api/__init__.py +0 -0
  179. {mloda_core → mloda/core}/abstract_plugins/components/input_data/creator/__init__.py +0 -0
  180. {mloda_core → mloda/core}/abstract_plugins/components/match_data/__init__.py +0 -0
  181. {mloda_core → mloda/core}/abstract_plugins/components/merge/__init__.py +0 -0
  182. {mloda_core → mloda/core}/abstract_plugins/components/plugin_option/__init__.py +0 -0
  183. {mloda_core → mloda/core}/abstract_plugins/components/utils.py +0 -0
  184. {mloda_core/abstract_plugins/plugin_loader → mloda/core/abstract_plugins/components/validators}/__init__.py +0 -0
  185. {mloda_core/api → mloda/core/abstract_plugins/plugin_loader}/__init__.py +0 -0
  186. {mloda_core → mloda/core}/abstract_plugins/plugin_loader/plugin_loader.py +0 -0
  187. {mloda_core/api/prepare → mloda/core/api}/__init__.py +0 -0
  188. {mloda_core/core → mloda/core/api/prepare}/__init__.py +0 -0
  189. {mloda_core/core/step → mloda/core/core}/__init__.py +0 -0
  190. {mloda_core/filter → mloda/core/core/step}/__init__.py +0 -0
  191. {mloda_core/prepare → mloda/core/filter}/__init__.py +0 -0
  192. {mloda_core → mloda/core}/filter/filter_parameter.py +0 -0
  193. {mloda_core/prepare/graph → mloda/core/prepare}/__init__.py +0 -0
  194. {mloda_core/runtime → mloda/core/prepare/graph}/__init__.py +0 -0
  195. {mloda_core/runtime/flight → mloda/core/prepare/validators}/__init__.py +0 -0
  196. {mloda_core/runtime/worker → mloda/core/runtime}/__init__.py +0 -0
  197. {mloda_core → mloda/core}/runtime/flight/flight_server.py +0 -0
  198. {mloda-0.3.2.dist-info → mloda-0.4.0.dist-info}/WHEEL +0 -0
  199. {mloda-0.3.2.dist-info → mloda-0.4.0.dist-info}/entry_points.txt +0 -0
  200. {mloda-0.3.2.dist-info → mloda-0.4.0.dist-info}/licenses/LICENSE.TXT +0 -0
  201. {mloda-0.3.2.dist-info → mloda-0.4.0.dist-info}/licenses/NOTICE.md +0 -0
@@ -7,16 +7,19 @@ from __future__ import annotations
7
7
  from abc import abstractmethod
8
8
  from typing import Any, List, Optional, Set, Type, Union
9
9
 
10
- from mloda_core.abstract_plugins.abstract_feature_group import AbstractFeatureGroup
11
- from mloda_core.abstract_plugins.components.feature import Feature
12
- from mloda_core.abstract_plugins.components.feature_chainer.feature_chain_parser import FeatureChainParser
13
- from mloda_core.abstract_plugins.components.feature_name import FeatureName
14
- from mloda_core.abstract_plugins.components.feature_set import FeatureSet
15
- from mloda_core.abstract_plugins.components.options import Options
10
+ from mloda import FeatureGroup
11
+ from mloda import Feature
12
+ from mloda.provider import FeatureChainParser
13
+ from mloda.provider import (
14
+ FeatureChainParserMixin,
15
+ )
16
+ from mloda.user import FeatureName
17
+ from mloda.provider import FeatureSet
18
+ from mloda import Options
16
19
  from mloda_plugins.feature_group.experimental.default_options_key import DefaultOptionKeys
17
20
 
18
21
 
19
- class TimeWindowFeatureGroup(AbstractFeatureGroup):
22
+ class TimeWindowFeatureGroup(FeatureChainParserMixin, FeatureGroup):
20
23
  # Option keys for time window configuration
21
24
  WINDOW_FUNCTION = "window_function"
22
25
  WINDOW_SIZE = "window_size"
@@ -66,21 +69,21 @@ class TimeWindowFeatureGroup(AbstractFeatureGroup):
66
69
 
67
70
  ## Requirements
68
71
  - The input data must have a datetime column that can be used for time-based operations
69
- - By default, the feature group will use DefaultOptionKeys.reference_time (default: "time_filter")
72
+ - By default, the feature group will use DefaultOptionKeys.reference_time (default: "reference_time")
70
73
  - You can specify a custom time column by setting the reference_time option in the feature group options
71
74
 
72
75
  """
73
76
 
74
77
  @classmethod
75
- def get_time_filter_feature(cls, options: Optional[Options] = None) -> str:
78
+ def get_reference_time_column(cls, options: Optional[Options] = None) -> str:
76
79
  """
77
- Get the time filter feature name from options or use the default.
80
+ Get the reference time column name from options or use the default.
78
81
 
79
82
  Args:
80
- options: Optional Options object that may contain a custom time filter feature name
83
+ options: Optional Options object that may contain a custom reference time column name
81
84
 
82
85
  Returns:
83
- The time filter feature name to use
86
+ The reference time column name to use
84
87
  """
85
88
  reference_time_key = DefaultOptionKeys.reference_time.value
86
89
  if options and options.get(reference_time_key):
@@ -90,7 +93,7 @@ class TimeWindowFeatureGroup(AbstractFeatureGroup):
90
93
  f"Invalid reference_time option: {reference_time}. Must be string. Is: {type(reference_time)}."
91
94
  )
92
95
  return reference_time
93
- return reference_time_key
96
+ return DefaultOptionKeys.reference_time.value
94
97
 
95
98
  # Define supported window functions
96
99
  WINDOW_FUNCTIONS = {
@@ -123,28 +126,28 @@ class TimeWindowFeatureGroup(AbstractFeatureGroup):
123
126
  # Window function parameter (context parameter)
124
127
  WINDOW_FUNCTION: {
125
128
  **WINDOW_FUNCTIONS, # Reference existing WINDOW_FUNCTIONS dict
126
- DefaultOptionKeys.mloda_context: True, # Mark as context parameter
127
- DefaultOptionKeys.mloda_strict_validation: True, # Enable strict validation
129
+ DefaultOptionKeys.context: True, # Mark as context parameter
130
+ DefaultOptionKeys.strict_validation: True, # Enable strict validation
128
131
  },
129
132
  # Window size parameter (context parameter)
130
133
  WINDOW_SIZE: {
131
134
  "explanation": "Size of the time window (must be positive integer)",
132
- DefaultOptionKeys.mloda_context: True, # Mark as context parameter
133
- DefaultOptionKeys.mloda_strict_validation: True, # Enable strict validation
134
- DefaultOptionKeys.mloda_validation_function: lambda x: (isinstance(x, int) and x > 0)
135
+ DefaultOptionKeys.context: True, # Mark as context parameter
136
+ DefaultOptionKeys.strict_validation: True, # Enable strict validation
137
+ DefaultOptionKeys.validation_function: lambda x: (isinstance(x, int) and x > 0)
135
138
  or (isinstance(x, str) and x.isdigit() and int(x) > 0),
136
139
  },
137
140
  # Time unit parameter (context parameter)
138
141
  TIME_UNIT: {
139
142
  **TIME_UNITS, # Reference existing TIME_UNITS dict
140
- DefaultOptionKeys.mloda_context: True, # Mark as context parameter
141
- DefaultOptionKeys.mloda_strict_validation: True, # Enable strict validation
143
+ DefaultOptionKeys.context: True, # Mark as context parameter
144
+ DefaultOptionKeys.strict_validation: True, # Enable strict validation
142
145
  },
143
146
  # Source feature parameter (context parameter)
144
147
  DefaultOptionKeys.in_features: {
145
148
  "explanation": "Source feature to apply time window operation to",
146
- DefaultOptionKeys.mloda_context: True, # Mark as context parameter
147
- DefaultOptionKeys.mloda_strict_validation: False, # Flexible validation
149
+ DefaultOptionKeys.context: True, # Mark as context parameter
150
+ DefaultOptionKeys.strict_validation: False, # Flexible validation
148
151
  },
149
152
  }
150
153
 
@@ -152,6 +155,11 @@ class TimeWindowFeatureGroup(AbstractFeatureGroup):
152
155
  PATTERN = "__"
153
156
  PREFIX_PATTERN = r".*__([\w]+)_(\d+)_([\w]+)_window$"
154
157
 
158
+ # In-feature configuration for FeatureChainParserMixin
159
+ MIN_IN_FEATURES = 1
160
+ MAX_IN_FEATURES = 1
161
+
162
+ # Custom input_features needed to add time_filter_feature
155
163
  def input_features(self, options: Options, feature_name: FeatureName) -> Optional[Set[Feature]]:
156
164
  """Extract source feature from either configuration-based options or string parsing."""
157
165
 
@@ -160,7 +168,7 @@ class TimeWindowFeatureGroup(AbstractFeatureGroup):
160
168
  # Try string-based parsing first
161
169
  _, source_feature = FeatureChainParser.parse_feature_name(feature_name.name, [self.PREFIX_PATTERN])
162
170
  if source_feature is not None:
163
- time_filter_feature = Feature(self.get_time_filter_feature(options))
171
+ time_filter_feature = Feature(self.get_reference_time_column(options))
164
172
  return {Feature(source_feature), time_filter_feature}
165
173
 
166
174
  # Fall back to configuration-based approach
@@ -170,9 +178,66 @@ class TimeWindowFeatureGroup(AbstractFeatureGroup):
170
178
  f"Expected exactly one source feature, but found {len(source_features)}: {source_features}"
171
179
  )
172
180
 
173
- time_filter_feature = Feature(self.get_time_filter_feature(options))
181
+ time_filter_feature = Feature(self.get_reference_time_column(options))
174
182
  return set(source_features) | {time_filter_feature}
175
183
 
184
+ @classmethod
185
+ def _extract_time_window_params(cls, feature: Feature) -> tuple[Optional[str], Optional[int], Optional[str]]:
186
+ """
187
+ Extract time window parameters (window_function, window_size, time_unit) from a feature.
188
+
189
+ Tries string-based parsing first using parse_time_window_prefix, falls back to configuration.
190
+
191
+ Args:
192
+ feature: The feature to extract parameters from
193
+
194
+ Returns:
195
+ Tuple of (window_function, window_size, time_unit), where any value may be None if not found
196
+ """
197
+ feature_name = feature.get_name()
198
+
199
+ # Try string-based parsing first
200
+ try:
201
+ window_function, window_size, time_unit = cls.parse_time_window_prefix(feature_name)
202
+ return window_function, window_size, time_unit
203
+ except ValueError:
204
+ pass
205
+
206
+ # Fall back to configuration
207
+ window_function = feature.options.get(cls.WINDOW_FUNCTION)
208
+ window_size = feature.options.get(cls.WINDOW_SIZE)
209
+ time_unit = feature.options.get(cls.TIME_UNIT)
210
+
211
+ # Convert window_size to int if it's a string
212
+ if window_size is not None and isinstance(window_size, str):
213
+ window_size = int(window_size)
214
+
215
+ return window_function, window_size, time_unit
216
+
217
+ @classmethod
218
+ def _extract_time_window_params_and_source_features(cls, feature: Feature) -> tuple[str, int, str, str]:
219
+ """
220
+ Extract time window parameters and source feature from a feature.
221
+
222
+ Tries string-based parsing first, falls back to configuration-based approach.
223
+
224
+ Args:
225
+ feature: The feature to extract parameters from
226
+
227
+ Returns:
228
+ Tuple of (window_function, window_size, time_unit, source_feature_name)
229
+
230
+ Raises:
231
+ ValueError: If parameters cannot be extracted
232
+ """
233
+ source_features = cls._extract_source_features(feature)
234
+ window_function, window_size, time_unit = cls._extract_time_window_params(feature)
235
+
236
+ if window_function is None or window_size is None or time_unit is None:
237
+ raise ValueError(f"Could not extract time window parameters from: {feature.name}")
238
+
239
+ return window_function, window_size, time_unit, source_features[0]
240
+
176
241
  @classmethod
177
242
  def parse_time_window_prefix(cls, feature_name: str) -> tuple[str, int, str]:
178
243
  """
@@ -243,24 +308,7 @@ class TimeWindowFeatureGroup(AbstractFeatureGroup):
243
308
  """Extract the time unit from the feature name."""
244
309
  return cls.parse_time_window_prefix(feature_name)[2]
245
310
 
246
- @classmethod
247
- def match_feature_group_criteria(
248
- cls,
249
- feature_name: Union[FeatureName, str],
250
- options: Options,
251
- data_access_collection: Optional[Any] = None,
252
- ) -> bool:
253
- """Check if feature name matches the expected pattern for time window features."""
254
- if isinstance(feature_name, FeatureName):
255
- feature_name = feature_name.name
256
-
257
- # Use unified parser approach with PROPERTY_MAPPING
258
- return FeatureChainParser.match_configuration_feature_chain_parser(
259
- feature_name,
260
- options,
261
- property_mapping=cls.PROPERTY_MAPPING,
262
- prefix_patterns=[cls.PREFIX_PATTERN],
263
- )
311
+ # match_feature_group_criteria() inherited from FeatureChainParserMixin
264
312
 
265
313
  @classmethod
266
314
  def calculate_feature(cls, data: Any, features: FeatureSet) -> Any:
@@ -283,52 +331,17 @@ class TimeWindowFeatureGroup(AbstractFeatureGroup):
283
331
  raise ValueError("All features must have the same options.")
284
332
  _options = feature.options
285
333
 
286
- time_filter_feature = cls.get_time_filter_feature(_options)
334
+ reference_time_column = cls.get_reference_time_column(_options)
287
335
 
288
- cls._check_time_filter_feature_exists(data, time_filter_feature)
336
+ cls._check_reference_time_column_exists(data, reference_time_column)
289
337
 
290
- cls._check_time_filter_feature_is_datetime(data, time_filter_feature)
338
+ cls._check_reference_time_column_is_datetime(data, reference_time_column)
291
339
 
292
340
  # Process each requested feature
293
341
  for feature in features.features:
294
- feature_name = feature.get_name()
295
-
296
- # Try string-based parsing first (for legacy features)
297
- parsed_params, in_features = FeatureChainParser.parse_feature_name(feature_name, [cls.PREFIX_PATTERN])
298
-
299
- if in_features is not None:
300
- # String-based approach succeeded
301
- window_function, window_size, time_unit = cls.parse_time_window_prefix(feature_name)
302
- else:
303
- # Fall back to configuration-based approach
304
- has_config_params = (
305
- feature.options.get(cls.WINDOW_FUNCTION) is not None
306
- and feature.options.get(cls.WINDOW_SIZE) is not None
307
- and feature.options.get(cls.TIME_UNIT) is not None
308
- )
309
-
310
- if not has_config_params:
311
- raise ValueError(
312
- f"Feature '{feature_name}' does not match string pattern and lacks configuration parameters"
313
- )
314
-
315
- # Configuration-based approach
316
- source_features = feature.options.get_in_features()
317
- if len(source_features) != 1:
318
- raise ValueError(
319
- f"Expected exactly one source feature, but found {len(source_features)}: {source_features}"
320
- )
321
- source_feature = next(iter(source_features))
322
- in_features = source_feature.get_name()
323
-
324
- # Extract parameters from options
325
- window_function = feature.options.get(cls.WINDOW_FUNCTION)
326
- window_size = feature.options.get(cls.WINDOW_SIZE)
327
- time_unit = feature.options.get(cls.TIME_UNIT)
328
-
329
- # Convert window_size to int if it's a string
330
- if isinstance(window_size, str):
331
- window_size = int(window_size)
342
+ window_function, window_size, time_unit, in_features = cls._extract_time_window_params_and_source_features(
343
+ feature
344
+ )
332
345
 
333
346
  # Resolve multi-column features automatically
334
347
  # If in_features is "onehot_encoded__product", this discovers
@@ -340,7 +353,7 @@ class TimeWindowFeatureGroup(AbstractFeatureGroup):
340
353
  cls._check_source_features_exist(data, resolved_columns)
341
354
 
342
355
  result = cls._perform_window_operation(
343
- data, window_function, window_size, time_unit, resolved_columns, time_filter_feature
356
+ data, window_function, window_size, time_unit, resolved_columns, reference_time_column
344
357
  )
345
358
 
346
359
  data = cls._add_result_to_data(data, feature.get_name(), result)
@@ -349,31 +362,31 @@ class TimeWindowFeatureGroup(AbstractFeatureGroup):
349
362
 
350
363
  @classmethod
351
364
  @abstractmethod
352
- def _check_time_filter_feature_exists(cls, data: Any, time_filter_feature: str) -> None:
365
+ def _check_reference_time_column_exists(cls, data: Any, reference_time_column: str) -> None:
353
366
  """
354
- Check if the time filter feature exists in the data.
367
+ Check if the reference time column exists in the data.
355
368
 
356
369
  Args:
357
370
  data: The input data
358
- time_filter_feature: The name of the time filter feature
371
+ reference_time_column: The name of the reference time column
359
372
 
360
373
  Raises:
361
- ValueError: If the time filter feature does not exist in the data
374
+ ValueError: If the reference time column does not exist in the data
362
375
  """
363
376
  ...
364
377
 
365
378
  @classmethod
366
379
  @abstractmethod
367
- def _check_time_filter_feature_is_datetime(cls, data: Any, time_filter_feature: str) -> None:
380
+ def _check_reference_time_column_is_datetime(cls, data: Any, reference_time_column: str) -> None:
368
381
  """
369
- Check if the time filter feature is a datetime column.
382
+ Check if the reference time column is a datetime column.
370
383
 
371
384
  Args:
372
385
  data: The input data
373
- time_filter_feature: The name of the time filter feature
386
+ reference_time_column: The name of the reference time column
374
387
 
375
388
  Raises:
376
- ValueError: If the time filter feature is not a datetime column
389
+ ValueError: If the reference time column is not a datetime column
377
390
  """
378
391
  ...
379
392
 
@@ -447,7 +460,7 @@ class TimeWindowFeatureGroup(AbstractFeatureGroup):
447
460
  time_unit: The time unit for the window
448
461
  in_features: List of resolved source feature names to perform window operation on
449
462
  time_filter_feature: The name of the time filter feature to use for time-based operations.
450
- If None, uses the value from get_time_filter_feature().
463
+ If None, uses the value from get_reference_time_column().
451
464
 
452
465
  Returns:
453
466
  The result of the window operation
@@ -6,7 +6,7 @@ from __future__ import annotations
6
6
 
7
7
  from typing import Any, List, Optional, Set, Type, Union
8
8
 
9
- from mloda_core.abstract_plugins.compute_frame_work import ComputeFrameWork
9
+ from mloda import ComputeFramework
10
10
  from mloda_plugins.compute_framework.base_implementations.pandas.dataframe import PandasDataFrame
11
11
  from mloda_plugins.feature_group.experimental.time_window.base import TimeWindowFeatureGroup
12
12
 
@@ -19,25 +19,25 @@ except ImportError:
19
19
 
20
20
  class PandasTimeWindowFeatureGroup(TimeWindowFeatureGroup):
21
21
  @classmethod
22
- def compute_framework_rule(cls) -> Union[bool, Set[Type[ComputeFrameWork]]]:
22
+ def compute_framework_rule(cls) -> Union[bool, Set[Type[ComputeFramework]]]:
23
23
  return {PandasDataFrame}
24
24
 
25
25
  @classmethod
26
- def _check_time_filter_feature_exists(cls, data: pd.DataFrame, time_filter_feature: str) -> None:
27
- """Check if the time filter feature exists in the DataFrame."""
28
- if time_filter_feature not in data.columns:
26
+ def _check_reference_time_column_exists(cls, data: pd.DataFrame, reference_time_column: str) -> None:
27
+ """Check if the reference time column exists in the DataFrame."""
28
+ if reference_time_column not in data.columns:
29
29
  raise ValueError(
30
- f"Time filter feature '{time_filter_feature}' not found in data. "
30
+ f"Reference time column '{reference_time_column}' not found in data. "
31
31
  f"Please ensure the DataFrame contains this column."
32
32
  )
33
33
 
34
34
  @classmethod
35
- def _check_time_filter_feature_is_datetime(cls, data: pd.DataFrame, time_filter_feature: str) -> None:
36
- """Check if the time filter feature is a datetime column."""
37
- if not pd.api.types.is_datetime64_any_dtype(data[time_filter_feature]):
35
+ def _check_reference_time_column_is_datetime(cls, data: pd.DataFrame, reference_time_column: str) -> None:
36
+ """Check if the reference time column is a datetime column."""
37
+ if not pd.api.types.is_datetime64_any_dtype(data[reference_time_column]):
38
38
  raise ValueError(
39
- f"Time filter feature '{time_filter_feature}' must be a datetime column. "
40
- f"Current dtype: {data[time_filter_feature].dtype}"
39
+ f"Reference time column '{reference_time_column}' must be a datetime column. "
40
+ f"Current dtype: {data[reference_time_column].dtype}"
41
41
  )
42
42
 
43
43
  @classmethod
@@ -93,14 +93,14 @@ class PandasTimeWindowFeatureGroup(TimeWindowFeatureGroup):
93
93
  time_unit: The time unit for the window
94
94
  in_features: List of source feature names (may be single or multiple columns)
95
95
  time_filter_feature: The name of the time filter feature to use for time-based operations.
96
- If None, uses the value from get_time_filter_feature().
96
+ If None, uses the value from get_reference_time_column().
97
97
 
98
98
  Returns:
99
99
  The result of the window operation
100
100
  """
101
101
  # Use the default time filter feature if none is provided
102
102
  if time_filter_feature is None:
103
- time_filter_feature = cls.get_time_filter_feature()
103
+ time_filter_feature = cls.get_reference_time_column()
104
104
 
105
105
  # Create a copy of the DataFrame with the time filter feature as the index
106
106
  # This is necessary for time-based rolling operations
@@ -10,7 +10,7 @@ import datetime
10
10
  import pyarrow as pa
11
11
  import pyarrow.compute as pc
12
12
 
13
- from mloda_core.abstract_plugins.compute_frame_work import ComputeFrameWork
13
+ from mloda import ComputeFramework
14
14
 
15
15
  from mloda_plugins.compute_framework.base_implementations.pyarrow.table import PyArrowTable
16
16
  from mloda_plugins.feature_group.experimental.time_window.base import TimeWindowFeatureGroup
@@ -18,25 +18,25 @@ from mloda_plugins.feature_group.experimental.time_window.base import TimeWindow
18
18
 
19
19
  class PyArrowTimeWindowFeatureGroup(TimeWindowFeatureGroup):
20
20
  @classmethod
21
- def compute_framework_rule(cls) -> Union[bool, Set[Type[ComputeFrameWork]]]:
21
+ def compute_framework_rule(cls) -> Union[bool, Set[Type[ComputeFramework]]]:
22
22
  return {PyArrowTable}
23
23
 
24
24
  @classmethod
25
- def _check_time_filter_feature_exists(cls, data: pa.Table, time_filter_feature: str) -> None:
26
- """Check if the time filter feature exists in the Table."""
27
- if time_filter_feature not in data.schema.names:
25
+ def _check_reference_time_column_exists(cls, data: pa.Table, reference_time_column: str) -> None:
26
+ """Check if the reference time column exists in the Table."""
27
+ if reference_time_column not in data.schema.names:
28
28
  raise ValueError(
29
- f"Time filter feature '{time_filter_feature}' not found in data. "
29
+ f"Reference time column '{reference_time_column}' not found in data. "
30
30
  f"Please ensure the Table contains this column."
31
31
  )
32
32
 
33
33
  @classmethod
34
- def _check_time_filter_feature_is_datetime(cls, data: pa.Table, time_filter_feature: str) -> None:
35
- """Check if the time filter feature is a datetime column."""
36
- time_column = data.column(time_filter_feature)
34
+ def _check_reference_time_column_is_datetime(cls, data: pa.Table, reference_time_column: str) -> None:
35
+ """Check if the reference time column is a datetime column."""
36
+ time_column = data.column(reference_time_column)
37
37
  if not pa.types.is_timestamp(time_column.type):
38
38
  raise ValueError(
39
- f"Time filter feature '{time_filter_feature}' must be a timestamp column. "
39
+ f"Reference time column '{reference_time_column}' must be a timestamp column. "
40
40
  f"Current type: {time_column.type}"
41
41
  )
42
42
 
@@ -103,14 +103,14 @@ class PyArrowTimeWindowFeatureGroup(TimeWindowFeatureGroup):
103
103
  time_unit: The time unit for the window
104
104
  in_features: List of source feature names (may be single or multiple columns)
105
105
  time_filter_feature: The name of the time filter feature to use for time-based operations.
106
- If None, uses the value from get_time_filter_feature().
106
+ If None, uses the value from get_reference_time_column().
107
107
 
108
108
  Returns:
109
109
  The result of the window operation as a PyArrow Array
110
110
  """
111
111
  # Use the default time filter feature if none is provided
112
112
  if time_filter_feature is None:
113
- time_filter_feature = cls.get_time_filter_feature()
113
+ time_filter_feature = cls.get_reference_time_column()
114
114
 
115
115
  # Get the time column
116
116
  time_column = data.column(time_filter_feature)
@@ -1,11 +1,9 @@
1
1
  from typing import Any, Optional
2
- from mloda_core.abstract_plugins.abstract_feature_group import AbstractFeatureGroup
3
- from mloda_core.abstract_plugins.components.feature_set import FeatureSet
4
- from mloda_core.abstract_plugins.components.input_data.api.api_input_data import ApiInputData
5
- from mloda_core.abstract_plugins.components.input_data.base_input_data import BaseInputData
2
+ from mloda import FeatureGroup
3
+ from mloda.provider import FeatureSet, ApiData as ApiInputData, BaseInputData
6
4
 
7
5
 
8
- class ApiInputDataFeature(AbstractFeatureGroup):
6
+ class ApiInputDataFeature(FeatureGroup):
9
7
  """
10
8
  Base class for API-based input data feature groups.
11
9
 
@@ -39,9 +37,9 @@ class ApiInputDataFeature(AbstractFeatureGroup):
39
37
  Uses Options to specify API data access configuration:
40
38
 
41
39
  ```python
42
- from mloda_core.abstract_plugins.components.feature import Feature
43
- from mloda_core.abstract_plugins.components.options import Options
44
- from mloda_core.abstract_plugins.components.input_data.api.api_input_data import ApiInputData
40
+ from mloda import Feature
41
+ from mloda import Options
42
+ from mloda.core.abstract_plugins.components.input_data.api.api_input_data import ApiInputData
45
43
 
46
44
  feature = Feature(
47
45
  name="user_profile",
@@ -60,8 +58,8 @@ class ApiInputDataFeature(AbstractFeatureGroup):
60
58
  ### Basic API Data Access
61
59
 
62
60
  ```python
63
- from mloda_core.abstract_plugins.components.feature import Feature
64
- from mloda_core.abstract_plugins.components.options import Options
61
+ from mloda import Feature
62
+ from mloda import Options
65
63
 
66
64
  # Simple API feature reference
67
65
  feature = Feature(name="api_user_score")
@@ -70,7 +68,7 @@ class ApiInputDataFeature(AbstractFeatureGroup):
70
68
  ### Configuration-Based with Endpoint Mapping
71
69
 
72
70
  ```python
73
- from mloda_core.abstract_plugins.components.input_data.api.api_input_data import ApiInputData
71
+ from mloda.core.abstract_plugins.components.input_data.api.api_input_data import ApiInputData
74
72
 
75
73
  # Map multiple API response fields
76
74
  feature = Feature(
@@ -3,12 +3,12 @@ from pathlib import Path
3
3
  from typing import Any, List, Set
4
4
 
5
5
 
6
- from mloda_core.abstract_plugins.abstract_feature_group import AbstractFeatureGroup
7
- from mloda_core.abstract_plugins.components.feature import Feature
8
- from mloda_core.abstract_plugins.components.feature_name import FeatureName
9
- from mloda_core.abstract_plugins.components.feature_set import FeatureSet
10
- from mloda_core.abstract_plugins.components.link import JoinType
11
- from mloda_core.abstract_plugins.components.options import Options
6
+ from mloda import FeatureGroup
7
+ from mloda import Feature
8
+ from mloda.user import FeatureName
9
+ from mloda.provider import FeatureSet
10
+ from mloda.user import JoinType
11
+ from mloda import Options
12
12
  from mloda_plugins.feature_group.experimental.default_options_key import DefaultOptionKeys
13
13
  from mloda_plugins.feature_group.experimental.dynamic_feature_group_factory.dynamic_feature_group_factory import (
14
14
  DynamicFeatureGroupCreator,
@@ -27,7 +27,7 @@ except ImportError:
27
27
  pd = None
28
28
 
29
29
 
30
- class ConcatenatedFileContent(AbstractFeatureGroup):
30
+ class ConcatenatedFileContent(FeatureGroup):
31
31
  """
32
32
  A feature group that reads and combines content from files within a directory (default: python files).
33
33
 
@@ -1,9 +1,7 @@
1
1
  from typing import Any, Dict, List, Optional, Tuple, Union
2
- from mloda_core.abstract_plugins.components.data_access_collection import DataAccessCollection
3
- from mloda_core.abstract_plugins.components.feature_set import FeatureSet
4
- from mloda_core.abstract_plugins.components.hashable_dict import HashableDict
5
- from mloda_core.abstract_plugins.components.input_data.base_input_data import BaseInputData
6
- from mloda_core.abstract_plugins.components.options import Options
2
+ from mloda.user import DataAccessCollection
3
+ from mloda.provider import FeatureSet, HashableDict, BaseInputData
4
+ from mloda import Options
7
5
 
8
6
 
9
7
  class ReadDB(BaseInputData):
@@ -85,11 +83,11 @@ class ReadDB(BaseInputData):
85
83
  data_accesses.append(data_access)
86
84
 
87
85
  if not data_accesses:
88
- return False
86
+ return None
89
87
 
90
88
  matched_data_access = cls.match_read_db_data_access(data_accesses, feature_names)
91
- if matched_data_access is False:
92
- return False
89
+ if matched_data_access is None:
90
+ return None
93
91
  return matched_data_access
94
92
 
95
93
  @classmethod
@@ -110,7 +108,7 @@ class ReadDB(BaseInputData):
110
108
  return data_access
111
109
  except NotImplementedError:
112
110
  continue
113
- return False
111
+ return None
114
112
 
115
113
  @classmethod
116
114
  def get_connection(cls, credentials: Any) -> Any:
@@ -1,12 +1,12 @@
1
1
  from typing import Any, Optional
2
2
 
3
- from mloda_core.abstract_plugins.abstract_feature_group import AbstractFeatureGroup
4
- from mloda_core.abstract_plugins.components.feature_set import FeatureSet
5
- from mloda_core.abstract_plugins.components.input_data.base_input_data import BaseInputData
3
+ from mloda import FeatureGroup
4
+ from mloda.provider import FeatureSet
5
+ from mloda.provider import BaseInputData
6
6
  from mloda_plugins.feature_group.input_data.read_db import ReadDB
7
7
 
8
8
 
9
- class ReadDBFeature(AbstractFeatureGroup):
9
+ class ReadDBFeature(FeatureGroup):
10
10
  @classmethod
11
11
  def input_data(cls) -> Optional[BaseInputData]:
12
12
  return ReadDB()
@@ -4,10 +4,9 @@ from typing import Any
4
4
  import pyarrow as pa
5
5
  import sqlite3
6
6
 
7
- from mloda_core.abstract_plugins.components.feature_set import FeatureSet
8
- from mloda_core.abstract_plugins.components.hashable_dict import HashableDict
9
- from mloda_core.abstract_plugins.components.data_types import DataType
10
- from mloda_core.abstract_plugins.components.options import Options
7
+ from mloda.provider import FeatureSet, HashableDict
8
+ from mloda.user import DataType
9
+ from mloda import Options
11
10
  from mloda_plugins.feature_group.input_data.read_db import ReadDB
12
11
 
13
12
 
@@ -46,9 +45,9 @@ class SQLITEReader(ReadDB):
46
45
  Uses Options with database credentials and configuration:
47
46
 
48
47
  ```python
49
- from mloda_core.abstract_plugins.components.feature import Feature
50
- from mloda_core.abstract_plugins.components.options import Options
51
- from mloda_core.abstract_plugins.components.hashable_dict import HashableDict
48
+ from mloda import Feature
49
+ from mloda import Options
50
+ from mloda.core.abstract_plugins.components.hashable_dict import HashableDict
52
51
 
53
52
  feature = Feature(
54
53
  name="customer_name",
@@ -68,8 +67,8 @@ class SQLITEReader(ReadDB):
68
67
  ### Basic SQLite Feature Access
69
68
 
70
69
  ```python
71
- from mloda_core.abstract_plugins.components.feature import Feature
72
- from mloda_core.abstract_plugins.components.hashable_dict import HashableDict
70
+ from mloda import Feature
71
+ from mloda.core.abstract_plugins.components.hashable_dict import HashableDict
73
72
 
74
73
  # Simple column reference from SQLite database
75
74
  feature = Feature(
@@ -117,7 +116,7 @@ class SQLITEReader(ReadDB):
117
116
  ### Using DataAccessCollection
118
117
 
119
118
  ```python
120
- from mloda_core.abstract_plugins.components.data_access_collection import DataAccessCollection
119
+ from mloda.user import DataAccessCollection
121
120
 
122
121
  # Configure database access at the collection level
123
122
  data_access = DataAccessCollection(
@@ -214,11 +213,22 @@ class SQLITEReader(ReadDB):
214
213
  def load_data(cls, data_access: Any, features: FeatureSet) -> Any:
215
214
  query = cls.build_query(features)
216
215
  result, column_names = cls.read_db(data_access, query)
217
- return cls.read_as_pa_data(result, column_names)
216
+ return cls.read_as_pa_data(result, column_names, features)
218
217
 
219
218
  @classmethod
220
- def read_as_pa_data(cls, result: Any, column_names: Any) -> Any:
221
- schema = pa.schema([(column_names[i], DataType.infer_arrow_type(result[0][i])) for i in range(len(result[0]))])
219
+ def read_as_pa_data(cls, result: Any, column_names: Any, features: Any) -> Any:
220
+ feature_map = {f.get_name(): f for f in features.features}
221
+
222
+ schema_fields = []
223
+ for i, col_name in enumerate(column_names):
224
+ feature = feature_map.get(col_name)
225
+ if feature and feature.data_type:
226
+ arrow_type = DataType.to_arrow_type(feature.data_type)
227
+ else:
228
+ arrow_type = DataType.infer_arrow_type(result[0][i])
229
+ schema_fields.append((col_name, arrow_type))
230
+
231
+ schema = pa.schema(schema_fields)
222
232
  data_dicts = [{column_names[i]: row[i] for i in range(len(row))} for row in result]
223
233
  table = pa.Table.from_pylist(data_dicts, schema=schema)
224
234
  return table