mloda 0.3.0__py3-none-any.whl → 0.3.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {mloda-0.3.0.dist-info → mloda-0.3.2.dist-info}/METADATA +10 -10
- {mloda-0.3.0.dist-info → mloda-0.3.2.dist-info}/RECORD +92 -91
- mloda_core/abstract_plugins/components/base_artifact.py +3 -1
- mloda_core/abstract_plugins/components/feature.py +4 -4
- mloda_core/abstract_plugins/components/feature_chainer/feature_chain_parser.py +44 -17
- mloda_core/abstract_plugins/components/feature_collection.py +2 -2
- mloda_core/abstract_plugins/components/feature_group_version.py +4 -4
- mloda_core/abstract_plugins/components/feature_name.py +0 -3
- mloda_core/abstract_plugins/components/input_data/base_input_data.py +3 -3
- mloda_core/abstract_plugins/components/link.py +113 -29
- mloda_core/abstract_plugins/components/options.py +10 -10
- mloda_core/api/prepare/setup_compute_framework.py +2 -2
- mloda_core/api/request.py +44 -13
- mloda_core/core/step/feature_group_step.py +2 -1
- mloda_core/filter/filter_engine.py +3 -12
- mloda_core/filter/filter_parameter.py +55 -0
- mloda_core/filter/single_filter.py +4 -4
- mloda_core/prepare/execution_plan.py +12 -6
- mloda_core/prepare/graph/graph.py +3 -3
- mloda_core/prepare/identify_feature_group.py +10 -3
- mloda_core/prepare/resolve_links.py +86 -18
- mloda_core/runtime/flight/flight_server.py +1 -1
- mloda_core/runtime/run.py +7 -5
- mloda_core/runtime/worker/multiprocessing_worker.py +11 -9
- mloda_plugins/compute_framework/base_implementations/duckdb/duckdb_filter_engine.py +7 -33
- mloda_plugins/compute_framework/base_implementations/duckdb/duckdb_pyarrow_transformer.py +1 -1
- mloda_plugins/compute_framework/base_implementations/iceberg/iceberg_filter_engine.py +22 -12
- mloda_plugins/compute_framework/base_implementations/iceberg/iceberg_framework.py +2 -2
- mloda_plugins/compute_framework/base_implementations/iceberg/iceberg_pyarrow_transformer.py +2 -2
- mloda_plugins/compute_framework/base_implementations/pandas/dataframe.py +2 -2
- mloda_plugins/compute_framework/base_implementations/pandas/pandaspyarrowtransformer.py +1 -1
- mloda_plugins/compute_framework/base_implementations/polars/dataframe.py +3 -3
- mloda_plugins/compute_framework/base_implementations/polars/lazy_dataframe.py +5 -5
- mloda_plugins/compute_framework/base_implementations/polars/polars_filter_engine.py +8 -34
- mloda_plugins/compute_framework/base_implementations/polars/polars_lazy_merge_engine.py +1 -1
- mloda_plugins/compute_framework/base_implementations/polars/polars_lazy_pyarrow_transformer.py +3 -3
- mloda_plugins/compute_framework/base_implementations/polars/polars_merge_engine.py +1 -1
- mloda_plugins/compute_framework/base_implementations/polars/polars_pyarrow_transformer.py +2 -2
- mloda_plugins/compute_framework/base_implementations/pyarrow/pyarrow_filter_engine.py +7 -33
- mloda_plugins/compute_framework/base_implementations/pyarrow/table.py +1 -1
- mloda_plugins/compute_framework/base_implementations/python_dict/python_dict_filter_engine.py +13 -32
- mloda_plugins/compute_framework/base_implementations/python_dict/python_dict_framework.py +1 -1
- mloda_plugins/compute_framework/base_implementations/python_dict/python_dict_pyarrow_transformer.py +1 -1
- mloda_plugins/compute_framework/base_implementations/spark/spark_filter_engine.py +13 -32
- mloda_plugins/compute_framework/base_implementations/spark/spark_framework.py +4 -4
- mloda_plugins/compute_framework/base_implementations/spark/spark_pyarrow_transformer.py +1 -1
- mloda_plugins/config/feature/loader.py +12 -18
- mloda_plugins/feature_group/experimental/aggregated_feature_group/base.py +20 -17
- mloda_plugins/feature_group/experimental/aggregated_feature_group/pandas.py +8 -8
- mloda_plugins/feature_group/experimental/aggregated_feature_group/polars_lazy.py +8 -8
- mloda_plugins/feature_group/experimental/aggregated_feature_group/pyarrow.py +7 -7
- mloda_plugins/feature_group/experimental/clustering/base.py +26 -26
- mloda_plugins/feature_group/experimental/clustering/pandas.py +31 -29
- mloda_plugins/feature_group/experimental/data_quality/missing_value/base.py +23 -22
- mloda_plugins/feature_group/experimental/data_quality/missing_value/pandas.py +16 -16
- mloda_plugins/feature_group/experimental/data_quality/missing_value/pyarrow.py +9 -11
- mloda_plugins/feature_group/experimental/data_quality/missing_value/python_dict.py +8 -8
- mloda_plugins/feature_group/experimental/default_options_key.py +1 -1
- mloda_plugins/feature_group/experimental/dimensionality_reduction/base.py +17 -15
- mloda_plugins/feature_group/experimental/dimensionality_reduction/pandas.py +30 -18
- mloda_plugins/feature_group/experimental/dynamic_feature_group_factory/dynamic_feature_group_factory.py +35 -35
- mloda_plugins/feature_group/experimental/forecasting/base.py +39 -29
- mloda_plugins/feature_group/experimental/forecasting/pandas.py +18 -18
- mloda_plugins/feature_group/experimental/geo_distance/base.py +18 -20
- mloda_plugins/feature_group/experimental/geo_distance/pandas.py +2 -2
- mloda_plugins/feature_group/experimental/llm/cli_features/refactor_git_cached.py +6 -6
- mloda_plugins/feature_group/experimental/llm/installed_packages_feature_group.py +2 -2
- mloda_plugins/feature_group/experimental/llm/list_directory_feature_group.py +2 -2
- mloda_plugins/feature_group/experimental/llm/llm_api/llm_base_request.py +2 -2
- mloda_plugins/feature_group/experimental/llm/llm_api/request_loop.py +3 -2
- mloda_plugins/feature_group/experimental/llm/llm_file_selector.py +1 -1
- mloda_plugins/feature_group/experimental/node_centrality/base.py +8 -12
- mloda_plugins/feature_group/experimental/node_centrality/pandas.py +2 -2
- mloda_plugins/feature_group/experimental/sklearn/encoding/base.py +11 -12
- mloda_plugins/feature_group/experimental/sklearn/encoding/pandas.py +2 -2
- mloda_plugins/feature_group/experimental/sklearn/pipeline/base.py +9 -14
- mloda_plugins/feature_group/experimental/sklearn/pipeline/pandas.py +2 -2
- mloda_plugins/feature_group/experimental/sklearn/scaling/base.py +8 -9
- mloda_plugins/feature_group/experimental/sklearn/scaling/pandas.py +2 -2
- mloda_plugins/feature_group/experimental/source_input_feature.py +10 -10
- mloda_plugins/feature_group/experimental/text_cleaning/base.py +8 -11
- mloda_plugins/feature_group/experimental/text_cleaning/pandas.py +2 -2
- mloda_plugins/feature_group/experimental/time_window/base.py +27 -25
- mloda_plugins/feature_group/experimental/time_window/pandas.py +8 -8
- mloda_plugins/feature_group/experimental/time_window/pyarrow.py +6 -6
- mloda_plugins/feature_group/input_data/read_context_files.py +1 -1
- mloda_plugins/function_extender/base_implementations/otel/otel_extender.py +1 -1
- {mloda-0.3.0.dist-info → mloda-0.3.2.dist-info}/WHEEL +0 -0
- {mloda-0.3.0.dist-info → mloda-0.3.2.dist-info}/entry_points.txt +0 -0
- {mloda-0.3.0.dist-info → mloda-0.3.2.dist-info}/licenses/LICENSE.TXT +0 -0
- {mloda-0.3.0.dist-info → mloda-0.3.2.dist-info}/licenses/NOTICE.md +0 -0
- {mloda-0.3.0.dist-info → mloda-0.3.2.dist-info}/top_level.txt +0 -0
|
@@ -4,15 +4,17 @@ Pandas implementation for dimensionality reduction feature groups.
|
|
|
4
4
|
|
|
5
5
|
from __future__ import annotations
|
|
6
6
|
|
|
7
|
-
from typing import Any, List, cast
|
|
7
|
+
from typing import Any, List, TYPE_CHECKING, cast
|
|
8
8
|
|
|
9
|
+
if TYPE_CHECKING:
|
|
10
|
+
from numpy.typing import NDArray
|
|
9
11
|
|
|
10
12
|
try:
|
|
11
13
|
import pandas as pd
|
|
12
14
|
import numpy as np
|
|
13
15
|
except ImportError:
|
|
14
16
|
pd = None
|
|
15
|
-
np = None # type: ignore
|
|
17
|
+
np = None # type: ignore[assignment]
|
|
16
18
|
|
|
17
19
|
# Check if required packages are available
|
|
18
20
|
SKLEARN_AVAILABLE = True
|
|
@@ -26,7 +28,7 @@ except ImportError:
|
|
|
26
28
|
|
|
27
29
|
|
|
28
30
|
from mloda_core.abstract_plugins.compute_frame_work import ComputeFrameWork
|
|
29
|
-
from mloda_plugins.compute_framework.base_implementations.pandas.dataframe import
|
|
31
|
+
from mloda_plugins.compute_framework.base_implementations.pandas.dataframe import PandasDataFrame
|
|
30
32
|
from mloda_plugins.feature_group.experimental.dimensionality_reduction.base import DimensionalityReductionFeatureGroup
|
|
31
33
|
|
|
32
34
|
|
|
@@ -34,7 +36,7 @@ class PandasDimensionalityReductionFeatureGroup(DimensionalityReductionFeatureGr
|
|
|
34
36
|
@classmethod
|
|
35
37
|
def compute_framework_rule(cls) -> set[type[ComputeFrameWork]]:
|
|
36
38
|
"""Define the compute framework for this feature group."""
|
|
37
|
-
return {
|
|
39
|
+
return {PandasDataFrame}
|
|
38
40
|
|
|
39
41
|
@classmethod
|
|
40
42
|
def _check_source_feature_exists(cls, data: pd.DataFrame, feature_name: str) -> None:
|
|
@@ -52,7 +54,7 @@ class PandasDimensionalityReductionFeatureGroup(DimensionalityReductionFeatureGr
|
|
|
52
54
|
raise ValueError(f"Feature '{feature_name}' not found in the data")
|
|
53
55
|
|
|
54
56
|
@classmethod
|
|
55
|
-
def _add_result_to_data(cls, data: pd.DataFrame, feature_name: str, result:
|
|
57
|
+
def _add_result_to_data(cls, data: "pd.DataFrame", feature_name: str, result: "NDArray[Any]") -> "pd.DataFrame":
|
|
56
58
|
"""
|
|
57
59
|
Add the dimensionality reduction result to the DataFrame using the multiple result columns pattern.
|
|
58
60
|
|
|
@@ -83,7 +85,7 @@ class PandasDimensionalityReductionFeatureGroup(DimensionalityReductionFeatureGr
|
|
|
83
85
|
dimension: int,
|
|
84
86
|
source_features: List[str],
|
|
85
87
|
options: Any,
|
|
86
|
-
) ->
|
|
88
|
+
) -> "NDArray[Any]":
|
|
87
89
|
"""
|
|
88
90
|
Perform dimensionality reduction on the specified features.
|
|
89
91
|
|
|
@@ -184,7 +186,7 @@ class PandasDimensionalityReductionFeatureGroup(DimensionalityReductionFeatureGr
|
|
|
184
186
|
raise ValueError(f"Unsupported dimensionality reduction algorithm: {algorithm}")
|
|
185
187
|
|
|
186
188
|
@classmethod
|
|
187
|
-
def _perform_pca_reduction(cls, X:
|
|
189
|
+
def _perform_pca_reduction(cls, X: "NDArray[Any]", dimension: int, svd_solver: str = "auto") -> "NDArray[Any]":
|
|
188
190
|
"""
|
|
189
191
|
Perform Principal Component Analysis (PCA).
|
|
190
192
|
|
|
@@ -202,17 +204,17 @@ class PandasDimensionalityReductionFeatureGroup(DimensionalityReductionFeatureGr
|
|
|
202
204
|
|
|
203
205
|
# Perform PCA
|
|
204
206
|
pca = PCA(n_components=dimension, random_state=42, svd_solver=svd_solver)
|
|
205
|
-
return pca.fit_transform(X)
|
|
207
|
+
return cast("NDArray[Any]", pca.fit_transform(X))
|
|
206
208
|
|
|
207
209
|
@classmethod
|
|
208
210
|
def _perform_tsne_reduction(
|
|
209
211
|
cls,
|
|
210
|
-
X:
|
|
212
|
+
X: "NDArray[Any]",
|
|
211
213
|
dimension: int,
|
|
212
214
|
max_iter: int = 250,
|
|
213
215
|
n_iter_without_progress: int = 50,
|
|
214
216
|
method: str = "barnes_hut",
|
|
215
|
-
) ->
|
|
217
|
+
) -> "NDArray[Any]":
|
|
216
218
|
"""
|
|
217
219
|
Perform t-Distributed Stochastic Neighbor Embedding (t-SNE).
|
|
218
220
|
|
|
@@ -254,10 +256,10 @@ class PandasDimensionalityReductionFeatureGroup(DimensionalityReductionFeatureGr
|
|
|
254
256
|
n_iter_without_progress=n_iter_without_progress,
|
|
255
257
|
method=actual_method,
|
|
256
258
|
)
|
|
257
|
-
return tsne.fit_transform(X)
|
|
259
|
+
return cast("NDArray[Any]", tsne.fit_transform(X))
|
|
258
260
|
|
|
259
261
|
@classmethod
|
|
260
|
-
def _perform_ica_reduction(cls, X:
|
|
262
|
+
def _perform_ica_reduction(cls, X: "NDArray[Any]", dimension: int, max_iter: int = 200) -> "NDArray[Any]":
|
|
261
263
|
"""
|
|
262
264
|
Perform Independent Component Analysis (ICA).
|
|
263
265
|
|
|
@@ -273,12 +275,22 @@ class PandasDimensionalityReductionFeatureGroup(DimensionalityReductionFeatureGr
|
|
|
273
275
|
if not SKLEARN_AVAILABLE:
|
|
274
276
|
raise ImportError("scikit-learn is required for ICA dimensionality reduction")
|
|
275
277
|
|
|
278
|
+
# For small datasets, increase tolerance and iterations for better convergence
|
|
279
|
+
n_samples = X.shape[0]
|
|
280
|
+
if n_samples < 50:
|
|
281
|
+
# Small datasets may need more iterations and higher tolerance
|
|
282
|
+
actual_max_iter = max(max_iter, 1000)
|
|
283
|
+
tol = 0.01
|
|
284
|
+
else:
|
|
285
|
+
actual_max_iter = max_iter
|
|
286
|
+
tol = 1e-4 # sklearn default
|
|
287
|
+
|
|
276
288
|
# Perform ICA
|
|
277
|
-
ica = FastICA(n_components=dimension, random_state=42, max_iter=
|
|
278
|
-
return ica.fit_transform(X)
|
|
289
|
+
ica = FastICA(n_components=dimension, random_state=42, max_iter=actual_max_iter, tol=tol)
|
|
290
|
+
return cast("NDArray[Any]", ica.fit_transform(X))
|
|
279
291
|
|
|
280
292
|
@classmethod
|
|
281
|
-
def _perform_lda_reduction(cls, X:
|
|
293
|
+
def _perform_lda_reduction(cls, X: "NDArray[Any]", dimension: int, df: "pd.DataFrame") -> "NDArray[Any]":
|
|
282
294
|
"""
|
|
283
295
|
Perform Linear Discriminant Analysis (LDA).
|
|
284
296
|
|
|
@@ -307,10 +319,10 @@ class PandasDimensionalityReductionFeatureGroup(DimensionalityReductionFeatureGr
|
|
|
307
319
|
|
|
308
320
|
# Perform LDA
|
|
309
321
|
lda = LinearDiscriminantAnalysis(n_components=dimension)
|
|
310
|
-
return lda.fit_transform(X, y)
|
|
322
|
+
return cast("NDArray[Any]", lda.fit_transform(X, y))
|
|
311
323
|
|
|
312
324
|
@classmethod
|
|
313
|
-
def _perform_isomap_reduction(cls, X:
|
|
325
|
+
def _perform_isomap_reduction(cls, X: "NDArray[Any]", dimension: int, n_neighbors: int = 5) -> "NDArray[Any]":
|
|
314
326
|
"""
|
|
315
327
|
Perform Isometric Mapping (Isomap).
|
|
316
328
|
|
|
@@ -328,4 +340,4 @@ class PandasDimensionalityReductionFeatureGroup(DimensionalityReductionFeatureGr
|
|
|
328
340
|
|
|
329
341
|
# Perform Isomap
|
|
330
342
|
isomap = Isomap(n_components=dimension, n_neighbors=n_neighbors)
|
|
331
|
-
return isomap.fit_transform(X)
|
|
343
|
+
return cast("NDArray[Any]", isomap.fit_transform(X))
|
|
@@ -141,7 +141,7 @@ class DynamicFeatureGroupCreator:
|
|
|
141
141
|
properties = {
|
|
142
142
|
"match_feature_group_criteria": custom_match_criteria,
|
|
143
143
|
"input_features": custom_input_features,
|
|
144
|
-
"compute_framework_rule": lambda: {
|
|
144
|
+
"compute_framework_rule": lambda: {PandasDataFrame},
|
|
145
145
|
}
|
|
146
146
|
|
|
147
147
|
CustomFG = DynamicFeatureGroupCreator.create(
|
|
@@ -202,70 +202,70 @@ class DynamicFeatureGroupCreator:
|
|
|
202
202
|
if class_name in DynamicFeatureGroupCreator._created_classes:
|
|
203
203
|
return DynamicFeatureGroupCreator._created_classes[class_name]
|
|
204
204
|
|
|
205
|
-
def set_feature_name(self, config: Options, feature_name: FeatureName) -> FeatureName: # type: ignore
|
|
205
|
+
def set_feature_name(self, config: Options, feature_name: FeatureName) -> FeatureName: # type: ignore[no-untyped-def]
|
|
206
206
|
if "set_feature_name" in properties:
|
|
207
|
-
return properties["set_feature_name"](self, config, feature_name) # type: ignore
|
|
207
|
+
return properties["set_feature_name"](self, config, feature_name) # type: ignore[no-any-return]
|
|
208
208
|
return feature_name
|
|
209
209
|
|
|
210
|
-
def match_feature_group_criteria( # type: ignore
|
|
210
|
+
def match_feature_group_criteria( # type: ignore[no-untyped-def]
|
|
211
211
|
cls,
|
|
212
212
|
feature_name: Union[FeatureName, str],
|
|
213
213
|
options: Options,
|
|
214
214
|
data_access_collection: Optional[DataAccessCollection] = None,
|
|
215
215
|
) -> bool:
|
|
216
216
|
if "match_feature_group_criteria" in properties:
|
|
217
|
-
return properties["match_feature_group_criteria"](cls, feature_name, options, data_access_collection) # type: ignore
|
|
218
|
-
return super(new_class, cls).match_feature_group_criteria(feature_name, options, data_access_collection) # type: ignore
|
|
217
|
+
return properties["match_feature_group_criteria"](cls, feature_name, options, data_access_collection) # type: ignore[no-any-return]
|
|
218
|
+
return super(new_class, cls).match_feature_group_criteria(feature_name, options, data_access_collection) # type: ignore[misc, arg-type, no-any-return]
|
|
219
219
|
|
|
220
|
-
def input_data(cls) -> Optional[BaseInputData]: # type: ignore
|
|
220
|
+
def input_data(cls) -> Optional[BaseInputData]: # type: ignore[no-untyped-def]
|
|
221
221
|
if "input_data" in properties:
|
|
222
|
-
return properties["input_data"]() # type: ignore
|
|
223
|
-
return super(new_class, cls).input_data() # type: ignore
|
|
222
|
+
return properties["input_data"]() # type: ignore[no-any-return]
|
|
223
|
+
return super(new_class, cls).input_data() # type: ignore[misc, arg-type, no-any-return]
|
|
224
224
|
|
|
225
|
-
def validate_input_features(cls, data: Any, features: FeatureSet) -> Optional[bool]: # type: ignore
|
|
225
|
+
def validate_input_features(cls, data: Any, features: FeatureSet) -> Optional[bool]: # type: ignore[no-untyped-def]
|
|
226
226
|
if "validate_input_features" in properties:
|
|
227
|
-
return properties["validate_input_features"](cls, data, features) # type: ignore
|
|
228
|
-
return super(new_class, cls).validate_input_features(data, features) # type: ignore
|
|
227
|
+
return properties["validate_input_features"](cls, data, features) # type: ignore[no-any-return]
|
|
228
|
+
return super(new_class, cls).validate_input_features(data, features) # type: ignore[misc, arg-type, no-any-return]
|
|
229
229
|
|
|
230
|
-
def calculate_feature(cls, data: Any, features: FeatureSet) -> Any: # type: ignore
|
|
230
|
+
def calculate_feature(cls, data: Any, features: FeatureSet) -> Any: # type: ignore[no-untyped-def]
|
|
231
231
|
if "calculate_feature" in properties:
|
|
232
232
|
return properties["calculate_feature"](cls, data, features)
|
|
233
|
-
return super(new_class, cls).calculate_feature(data, features) # type: ignore
|
|
233
|
+
return super(new_class, cls).calculate_feature(data, features) # type: ignore[misc, arg-type]
|
|
234
234
|
|
|
235
|
-
def validate_output_features(cls, data: Any, features: FeatureSet) -> Optional[bool]: # type: ignore
|
|
235
|
+
def validate_output_features(cls, data: Any, features: FeatureSet) -> Optional[bool]: # type: ignore[no-untyped-def]
|
|
236
236
|
if "validate_output_features" in properties:
|
|
237
|
-
return properties["validate_output_features"](cls, data, features) # type: ignore
|
|
238
|
-
return super(new_class, cls).validate_output_features(data, features) # type: ignore
|
|
237
|
+
return properties["validate_output_features"](cls, data, features) # type: ignore[no-any-return]
|
|
238
|
+
return super(new_class, cls).validate_output_features(data, features) # type: ignore[misc, arg-type, no-any-return]
|
|
239
239
|
|
|
240
|
-
def artifact(cls) -> Optional[Type[Any]]: # type: ignore
|
|
240
|
+
def artifact(cls) -> Optional[Type[Any]]: # type: ignore[no-untyped-def]
|
|
241
241
|
if "artifact" in properties:
|
|
242
|
-
return properties["artifact"]() # type: ignore
|
|
243
|
-
return super(new_class, cls).artifact() # type: ignore
|
|
242
|
+
return properties["artifact"]() # type: ignore[no-any-return]
|
|
243
|
+
return super(new_class, cls).artifact() # type: ignore[misc, arg-type, no-any-return]
|
|
244
244
|
|
|
245
|
-
def compute_framework_rule(cls) -> Union[bool, Set[Type[ComputeFrameWork]]]: # type: ignore
|
|
245
|
+
def compute_framework_rule(cls) -> Union[bool, Set[Type[ComputeFrameWork]]]: # type: ignore[no-untyped-def]
|
|
246
246
|
if "compute_framework_rule" in properties:
|
|
247
|
-
return properties["compute_framework_rule"]() # type: ignore
|
|
248
|
-
return super(new_class, cls).compute_framework_rule() # type: ignore
|
|
247
|
+
return properties["compute_framework_rule"]() # type: ignore[no-any-return]
|
|
248
|
+
return super(new_class, cls).compute_framework_rule() # type: ignore[misc, arg-type, no-any-return]
|
|
249
249
|
|
|
250
|
-
def return_data_type_rule(cls, feature: Any) -> Optional[DataType]: # type: ignore
|
|
250
|
+
def return_data_type_rule(cls, feature: Any) -> Optional[DataType]: # type: ignore[no-untyped-def]
|
|
251
251
|
if "return_data_type_rule" in properties:
|
|
252
|
-
return properties["return_data_type_rule"](cls, feature) # type: ignore
|
|
253
|
-
return super(new_class, cls).return_data_type_rule(feature) # type: ignore
|
|
252
|
+
return properties["return_data_type_rule"](cls, feature) # type: ignore[no-any-return]
|
|
253
|
+
return super(new_class, cls).return_data_type_rule(feature) # type: ignore[misc, arg-type, no-any-return]
|
|
254
254
|
|
|
255
|
-
def input_features(self, options: Options, feature_name: FeatureName) -> Optional[Set[Any]]: # type: ignore
|
|
255
|
+
def input_features(self, options: Options, feature_name: FeatureName) -> Optional[Set[Any]]: # type: ignore[no-untyped-def]
|
|
256
256
|
if "input_features" in properties:
|
|
257
|
-
return properties["input_features"](self, options, feature_name) # type: ignore
|
|
258
|
-
return super(new_class, self).input_features(options, feature_name) # type: ignore
|
|
257
|
+
return properties["input_features"](self, options, feature_name) # type: ignore[no-any-return]
|
|
258
|
+
return super(new_class, self).input_features(options, feature_name) # type: ignore[misc, arg-type, no-any-return]
|
|
259
259
|
|
|
260
|
-
def index_columns(cls) -> Optional[List[Index]]: # type: ignore
|
|
260
|
+
def index_columns(cls) -> Optional[List[Index]]: # type: ignore[no-untyped-def]
|
|
261
261
|
if "index_columns" in properties:
|
|
262
|
-
return properties["index_columns"]() # type: ignore
|
|
263
|
-
return super(new_class, cls).index_columns() # type: ignore
|
|
262
|
+
return properties["index_columns"]() # type: ignore[no-any-return]
|
|
263
|
+
return super(new_class, cls).index_columns() # type: ignore[misc, arg-type, no-any-return]
|
|
264
264
|
|
|
265
|
-
def supports_index(cls, index: Index) -> Optional[bool]: # type: ignore
|
|
265
|
+
def supports_index(cls, index: Index) -> Optional[bool]: # type: ignore[no-untyped-def]
|
|
266
266
|
if "supports_index" in properties:
|
|
267
|
-
return properties["supports_index"](cls, index) # type: ignore
|
|
268
|
-
return super(new_class, cls).supports_index(index) # type: ignore
|
|
267
|
+
return properties["supports_index"](cls, index) # type: ignore[no-any-return]
|
|
268
|
+
return super(new_class, cls).supports_index(index) # type: ignore[misc, arg-type, no-any-return]
|
|
269
269
|
|
|
270
270
|
new_class = type(
|
|
271
271
|
class_name,
|
|
@@ -4,12 +4,16 @@ Base implementation for forecasting feature groups.
|
|
|
4
4
|
|
|
5
5
|
from __future__ import annotations
|
|
6
6
|
|
|
7
|
+
from abc import abstractmethod
|
|
7
8
|
from typing import Any, List, Optional, Set, Type, Union
|
|
8
9
|
|
|
9
10
|
from mloda_core.abstract_plugins.abstract_feature_group import AbstractFeatureGroup
|
|
10
11
|
from mloda_core.abstract_plugins.components.base_artifact import BaseArtifact
|
|
11
12
|
from mloda_core.abstract_plugins.components.feature import Feature
|
|
12
|
-
from mloda_core.abstract_plugins.components.feature_chainer.feature_chain_parser import
|
|
13
|
+
from mloda_core.abstract_plugins.components.feature_chainer.feature_chain_parser import (
|
|
14
|
+
CHAIN_SEPARATOR,
|
|
15
|
+
FeatureChainParser,
|
|
16
|
+
)
|
|
13
17
|
from mloda_core.abstract_plugins.components.feature_name import FeatureName
|
|
14
18
|
from mloda_core.abstract_plugins.components.feature_set import FeatureSet
|
|
15
19
|
from mloda_core.abstract_plugins.components.options import Options
|
|
@@ -30,7 +34,7 @@ class ForecastingFeatureGroup(AbstractFeatureGroup):
|
|
|
30
34
|
|
|
31
35
|
### 1. String-Based Creation
|
|
32
36
|
|
|
33
|
-
Features follow the naming pattern: `{
|
|
37
|
+
Features follow the naming pattern: `{in_features}__{algorithm}_forecast_{horizon}{time_unit}`
|
|
34
38
|
|
|
35
39
|
Examples:
|
|
36
40
|
```python
|
|
@@ -53,7 +57,7 @@ class ForecastingFeatureGroup(AbstractFeatureGroup):
|
|
|
53
57
|
ForecastingFeatureGroup.ALGORITHM: "linear",
|
|
54
58
|
ForecastingFeatureGroup.HORIZON: 7,
|
|
55
59
|
ForecastingFeatureGroup.TIME_UNIT: "day",
|
|
56
|
-
DefaultOptionKeys.
|
|
60
|
+
DefaultOptionKeys.in_features: "sales",
|
|
57
61
|
}
|
|
58
62
|
)
|
|
59
63
|
)
|
|
@@ -66,7 +70,7 @@ class ForecastingFeatureGroup(AbstractFeatureGroup):
|
|
|
66
70
|
- `algorithm`: The forecasting algorithm to use
|
|
67
71
|
- `horizon`: The forecast horizon (number of time units)
|
|
68
72
|
- `time_unit`: The time unit for the horizon
|
|
69
|
-
- `
|
|
73
|
+
- `in_features`: The source feature to generate forecasts for
|
|
70
74
|
|
|
71
75
|
### Group Parameters
|
|
72
76
|
Currently none for ForecastingFeatureGroup. Parameters that affect Feature Group
|
|
@@ -128,7 +132,6 @@ class ForecastingFeatureGroup(AbstractFeatureGroup):
|
|
|
128
132
|
|
|
129
133
|
# Define the prefix pattern for this feature group
|
|
130
134
|
PREFIX_PATTERN = r".*__([\w]+)_forecast_(\d+)([\w]+)$"
|
|
131
|
-
PATTERN = "__"
|
|
132
135
|
|
|
133
136
|
# Property mapping for configuration-based features with group/context separation
|
|
134
137
|
PROPERTY_MAPPING = {
|
|
@@ -151,7 +154,7 @@ class ForecastingFeatureGroup(AbstractFeatureGroup):
|
|
|
151
154
|
DefaultOptionKeys.mloda_context: True,
|
|
152
155
|
DefaultOptionKeys.mloda_strict_validation: True,
|
|
153
156
|
},
|
|
154
|
-
DefaultOptionKeys.
|
|
157
|
+
DefaultOptionKeys.in_features: {
|
|
155
158
|
"explanation": "Source feature to generate forecasts for",
|
|
156
159
|
DefaultOptionKeys.mloda_context: True,
|
|
157
160
|
DefaultOptionKeys.mloda_strict_validation: False,
|
|
@@ -202,13 +205,13 @@ class ForecastingFeatureGroup(AbstractFeatureGroup):
|
|
|
202
205
|
source_feature: str | None = None
|
|
203
206
|
|
|
204
207
|
# Try string-based parsing first
|
|
205
|
-
_, source_feature = FeatureChainParser.parse_feature_name(feature_name,
|
|
208
|
+
_, source_feature = FeatureChainParser.parse_feature_name(feature_name, [self.PREFIX_PATTERN])
|
|
206
209
|
if source_feature is not None:
|
|
207
210
|
time_filter_feature = Feature(self.get_time_filter_feature(options))
|
|
208
211
|
return {Feature(source_feature), time_filter_feature}
|
|
209
212
|
|
|
210
213
|
# Fall back to configuration-based approach
|
|
211
|
-
source_features = options.
|
|
214
|
+
source_features = options.get_in_features()
|
|
212
215
|
if len(source_features) != 1:
|
|
213
216
|
raise ValueError(
|
|
214
217
|
f"Expected exactly one source feature, but found {len(source_features)}: {source_features}"
|
|
@@ -246,7 +249,7 @@ class ForecastingFeatureGroup(AbstractFeatureGroup):
|
|
|
246
249
|
if len(parts) < 3 or parts[1] != "forecast":
|
|
247
250
|
raise ValueError(
|
|
248
251
|
f"Invalid forecast feature name format: {feature_name}. "
|
|
249
|
-
f"Expected format: {{
|
|
252
|
+
f"Expected format: {{in_features}}__{{algorithm}}_forecast_{{horizon}}{{time_unit}}"
|
|
250
253
|
)
|
|
251
254
|
|
|
252
255
|
algorithm = parts[0]
|
|
@@ -297,7 +300,6 @@ class ForecastingFeatureGroup(AbstractFeatureGroup):
|
|
|
297
300
|
feature_name,
|
|
298
301
|
options,
|
|
299
302
|
property_mapping=cls.PROPERTY_MAPPING,
|
|
300
|
-
pattern=cls.PATTERN,
|
|
301
303
|
prefix_patterns=[cls.PREFIX_PATTERN],
|
|
302
304
|
)
|
|
303
305
|
|
|
@@ -306,7 +308,7 @@ class ForecastingFeatureGroup(AbstractFeatureGroup):
|
|
|
306
308
|
feature_name_str = feature_name.name if isinstance(feature_name, FeatureName) else feature_name
|
|
307
309
|
|
|
308
310
|
# Check if this is a string-based feature (contains the pattern)
|
|
309
|
-
if
|
|
311
|
+
if FeatureChainParser.is_chained_feature(feature_name_str):
|
|
310
312
|
try:
|
|
311
313
|
# Use existing validation logic that validates algorithm, horizon, and time_unit
|
|
312
314
|
cls.parse_forecast_suffix(feature_name_str)
|
|
@@ -350,13 +352,13 @@ class ForecastingFeatureGroup(AbstractFeatureGroup):
|
|
|
350
352
|
|
|
351
353
|
# Process each requested feature with the original clean data
|
|
352
354
|
for feature in features.features:
|
|
353
|
-
algorithm, horizon, time_unit,
|
|
355
|
+
algorithm, horizon, time_unit, in_features = cls._extract_forecasting_parameters(feature)
|
|
354
356
|
|
|
355
357
|
# Resolve multi-column features automatically
|
|
356
|
-
# If
|
|
358
|
+
# If in_features is "onehot_encoded__product", this discovers
|
|
357
359
|
# ["onehot_encoded__product~0", "onehot_encoded__product~1", ...]
|
|
358
360
|
available_columns = cls._get_available_columns(original_data)
|
|
359
|
-
resolved_columns = cls.resolve_multi_column_feature(
|
|
361
|
+
resolved_columns = cls.resolve_multi_column_feature(in_features, available_columns)
|
|
360
362
|
|
|
361
363
|
# Check that resolved columns exist
|
|
362
364
|
cls._check_source_features_exist(original_data, resolved_columns)
|
|
@@ -428,14 +430,15 @@ class ForecastingFeatureGroup(AbstractFeatureGroup):
|
|
|
428
430
|
# Try string-based parsing first
|
|
429
431
|
feature_name_str = feature.name.name if hasattr(feature.name, "name") else str(feature.name)
|
|
430
432
|
|
|
431
|
-
if
|
|
433
|
+
if FeatureChainParser.is_chained_feature(feature_name_str):
|
|
432
434
|
algorithm, horizon, time_unit = cls.parse_forecast_suffix(feature_name_str)
|
|
433
|
-
|
|
434
|
-
|
|
435
|
+
|
|
436
|
+
# Extract source feature name (everything before the last double underscore)
|
|
437
|
+
source_feature_name = feature_name_str.rsplit(CHAIN_SEPARATOR, 1)[0]
|
|
435
438
|
return algorithm, horizon, time_unit, source_feature_name
|
|
436
439
|
|
|
437
440
|
# Fall back to configuration-based approach
|
|
438
|
-
source_features = feature.options.
|
|
441
|
+
source_features = feature.options.get_in_features()
|
|
439
442
|
source_feature = next(iter(source_features))
|
|
440
443
|
source_feature_name = source_feature.get_name()
|
|
441
444
|
|
|
@@ -466,6 +469,7 @@ class ForecastingFeatureGroup(AbstractFeatureGroup):
|
|
|
466
469
|
return algorithm, horizon, time_unit, source_feature_name
|
|
467
470
|
|
|
468
471
|
@classmethod
|
|
472
|
+
@abstractmethod
|
|
469
473
|
def _check_time_filter_feature_exists(cls, data: Any, time_filter_feature: str) -> None:
|
|
470
474
|
"""
|
|
471
475
|
Check if the time filter feature exists in the data.
|
|
@@ -477,9 +481,10 @@ class ForecastingFeatureGroup(AbstractFeatureGroup):
|
|
|
477
481
|
Raises:
|
|
478
482
|
ValueError: If the time filter feature does not exist in the data
|
|
479
483
|
"""
|
|
480
|
-
|
|
484
|
+
...
|
|
481
485
|
|
|
482
486
|
@classmethod
|
|
487
|
+
@abstractmethod
|
|
483
488
|
def _check_time_filter_feature_is_datetime(cls, data: Any, time_filter_feature: str) -> None:
|
|
484
489
|
"""
|
|
485
490
|
Check if the time filter feature is a datetime column.
|
|
@@ -491,9 +496,10 @@ class ForecastingFeatureGroup(AbstractFeatureGroup):
|
|
|
491
496
|
Raises:
|
|
492
497
|
ValueError: If the time filter feature is not a datetime column
|
|
493
498
|
"""
|
|
494
|
-
|
|
499
|
+
...
|
|
495
500
|
|
|
496
501
|
@classmethod
|
|
502
|
+
@abstractmethod
|
|
497
503
|
def _get_available_columns(cls, data: Any) -> Set[str]:
|
|
498
504
|
"""
|
|
499
505
|
Get the set of available column names from the data.
|
|
@@ -504,9 +510,10 @@ class ForecastingFeatureGroup(AbstractFeatureGroup):
|
|
|
504
510
|
Returns:
|
|
505
511
|
Set of column names available in the data
|
|
506
512
|
"""
|
|
507
|
-
|
|
513
|
+
...
|
|
508
514
|
|
|
509
515
|
@classmethod
|
|
516
|
+
@abstractmethod
|
|
510
517
|
def _check_source_features_exist(cls, data: Any, feature_names: List[str]) -> None:
|
|
511
518
|
"""
|
|
512
519
|
Check if the resolved source features exist in the data.
|
|
@@ -518,9 +525,10 @@ class ForecastingFeatureGroup(AbstractFeatureGroup):
|
|
|
518
525
|
Raises:
|
|
519
526
|
ValueError: If none of the features exist in the data
|
|
520
527
|
"""
|
|
521
|
-
|
|
528
|
+
...
|
|
522
529
|
|
|
523
530
|
@classmethod
|
|
531
|
+
@abstractmethod
|
|
524
532
|
def _add_result_to_data(cls, data: Any, feature_name: str, result: Any) -> Any:
|
|
525
533
|
"""
|
|
526
534
|
Add the result to the data.
|
|
@@ -533,16 +541,17 @@ class ForecastingFeatureGroup(AbstractFeatureGroup):
|
|
|
533
541
|
Returns:
|
|
534
542
|
The updated data
|
|
535
543
|
"""
|
|
536
|
-
|
|
544
|
+
...
|
|
537
545
|
|
|
538
546
|
@classmethod
|
|
547
|
+
@abstractmethod
|
|
539
548
|
def _perform_forecasting(
|
|
540
549
|
cls,
|
|
541
550
|
data: Any,
|
|
542
551
|
algorithm: str,
|
|
543
552
|
horizon: int,
|
|
544
553
|
time_unit: str,
|
|
545
|
-
|
|
554
|
+
in_features: List[str],
|
|
546
555
|
time_filter_feature: str,
|
|
547
556
|
model_artifact: Optional[Any] = None,
|
|
548
557
|
) -> tuple[Any, Optional[Any]]:
|
|
@@ -558,23 +567,24 @@ class ForecastingFeatureGroup(AbstractFeatureGroup):
|
|
|
558
567
|
algorithm: The forecasting algorithm to use
|
|
559
568
|
horizon: The forecast horizon
|
|
560
569
|
time_unit: The time unit for the horizon
|
|
561
|
-
|
|
570
|
+
in_features: List of resolved source feature names to forecast
|
|
562
571
|
time_filter_feature: The name of the time filter feature
|
|
563
572
|
model_artifact: Optional artifact containing a trained model
|
|
564
573
|
|
|
565
574
|
Returns:
|
|
566
575
|
A tuple containing (forecast_result, updated_artifact)
|
|
567
576
|
"""
|
|
568
|
-
|
|
577
|
+
...
|
|
569
578
|
|
|
570
579
|
@classmethod
|
|
580
|
+
@abstractmethod
|
|
571
581
|
def _perform_forecasting_with_confidence(
|
|
572
582
|
cls,
|
|
573
583
|
data: Any,
|
|
574
584
|
algorithm: str,
|
|
575
585
|
horizon: int,
|
|
576
586
|
time_unit: str,
|
|
577
|
-
|
|
587
|
+
in_features: List[str],
|
|
578
588
|
time_filter_feature: str,
|
|
579
589
|
model_artifact: Optional[Any] = None,
|
|
580
590
|
) -> tuple[Any, Any, Any, Optional[Any]]:
|
|
@@ -588,7 +598,7 @@ class ForecastingFeatureGroup(AbstractFeatureGroup):
|
|
|
588
598
|
algorithm: The forecasting algorithm to use
|
|
589
599
|
horizon: The forecast horizon
|
|
590
600
|
time_unit: The time unit for the horizon
|
|
591
|
-
|
|
601
|
+
in_features: List of resolved source feature names to forecast
|
|
592
602
|
time_filter_feature: The name of the time filter feature
|
|
593
603
|
model_artifact: Optional artifact containing a trained model
|
|
594
604
|
|
|
@@ -599,4 +609,4 @@ class ForecastingFeatureGroup(AbstractFeatureGroup):
|
|
|
599
609
|
- upper_bound: The upper confidence bound
|
|
600
610
|
- updated_artifact: The updated artifact (or None)
|
|
601
611
|
"""
|
|
602
|
-
|
|
612
|
+
...
|
|
@@ -28,7 +28,7 @@ except ImportError:
|
|
|
28
28
|
|
|
29
29
|
|
|
30
30
|
from mloda_core.abstract_plugins.compute_frame_work import ComputeFrameWork
|
|
31
|
-
from mloda_plugins.compute_framework.base_implementations.pandas.dataframe import
|
|
31
|
+
from mloda_plugins.compute_framework.base_implementations.pandas.dataframe import PandasDataFrame
|
|
32
32
|
from mloda_plugins.feature_group.experimental.forecasting.base import ForecastingFeatureGroup
|
|
33
33
|
|
|
34
34
|
|
|
@@ -36,7 +36,7 @@ class PandasForecastingFeatureGroup(ForecastingFeatureGroup):
|
|
|
36
36
|
@classmethod
|
|
37
37
|
def compute_framework_rule(cls) -> set[type[ComputeFrameWork]]:
|
|
38
38
|
"""Define the compute framework for this feature group."""
|
|
39
|
-
return {
|
|
39
|
+
return {PandasDataFrame}
|
|
40
40
|
|
|
41
41
|
@classmethod
|
|
42
42
|
def _get_available_columns(cls, data: pd.DataFrame) -> Set[str]:
|
|
@@ -120,7 +120,7 @@ class PandasForecastingFeatureGroup(ForecastingFeatureGroup):
|
|
|
120
120
|
algorithm: str,
|
|
121
121
|
horizon: int,
|
|
122
122
|
time_unit: str,
|
|
123
|
-
|
|
123
|
+
in_features: List[str],
|
|
124
124
|
time_filter_feature: str,
|
|
125
125
|
model_artifact: Optional[Any] = None,
|
|
126
126
|
) -> Tuple[pd.Series, Dict[str, Any]]:
|
|
@@ -142,7 +142,7 @@ class PandasForecastingFeatureGroup(ForecastingFeatureGroup):
|
|
|
142
142
|
algorithm: The forecasting algorithm to use
|
|
143
143
|
horizon: The forecast horizon
|
|
144
144
|
time_unit: The time unit for the horizon
|
|
145
|
-
|
|
145
|
+
in_features: List of resolved source feature names to forecast
|
|
146
146
|
time_filter_feature: The name of the time filter feature
|
|
147
147
|
model_artifact: Optional artifact containing a trained model
|
|
148
148
|
|
|
@@ -173,7 +173,7 @@ class PandasForecastingFeatureGroup(ForecastingFeatureGroup):
|
|
|
173
173
|
# For multi-column features, we need to handle each column separately or aggregate them
|
|
174
174
|
# For now, we'll use the first column for single-column behavior
|
|
175
175
|
# In the future, this could be extended to forecast multiple columns or aggregated columns
|
|
176
|
-
source_feature_name =
|
|
176
|
+
source_feature_name = in_features[0] if len(in_features) == 1 else in_features[0]
|
|
177
177
|
|
|
178
178
|
# Create or load the model
|
|
179
179
|
if model_artifact is None:
|
|
@@ -314,14 +314,14 @@ class PandasForecastingFeatureGroup(ForecastingFeatureGroup):
|
|
|
314
314
|
|
|
315
315
|
@classmethod
|
|
316
316
|
def _create_features(
|
|
317
|
-
cls, df: pd.DataFrame,
|
|
317
|
+
cls, df: pd.DataFrame, in_features: str, time_filter_feature: str, lag_features: List[int]
|
|
318
318
|
) -> Tuple[pd.DataFrame, pd.Series]:
|
|
319
319
|
"""
|
|
320
320
|
Create features for training the forecasting model.
|
|
321
321
|
|
|
322
322
|
Args:
|
|
323
323
|
df: The pandas DataFrame
|
|
324
|
-
|
|
324
|
+
in_features: The name of the source feature
|
|
325
325
|
time_filter_feature: The name of the time filter feature
|
|
326
326
|
lag_features: List of lag periods to use
|
|
327
327
|
|
|
@@ -332,13 +332,13 @@ class PandasForecastingFeatureGroup(ForecastingFeatureGroup):
|
|
|
332
332
|
df_features = df.copy()
|
|
333
333
|
|
|
334
334
|
# Extract target variable
|
|
335
|
-
y = df_features[
|
|
335
|
+
y = df_features[in_features]
|
|
336
336
|
|
|
337
337
|
# Create time-based features
|
|
338
338
|
df_features = cls._create_time_features(df_features, time_filter_feature)
|
|
339
339
|
|
|
340
340
|
# Create lag features (previous values)
|
|
341
|
-
df_features = cls._create_lag_features(df_features,
|
|
341
|
+
df_features = cls._create_lag_features(df_features, in_features, lags=lag_features)
|
|
342
342
|
|
|
343
343
|
# Drop rows with NaN values (from lag features)
|
|
344
344
|
df_features = df_features.dropna()
|
|
@@ -353,7 +353,7 @@ class PandasForecastingFeatureGroup(ForecastingFeatureGroup):
|
|
|
353
353
|
)
|
|
354
354
|
|
|
355
355
|
# Drop the original source feature and time filter feature
|
|
356
|
-
X = df_features.drop([
|
|
356
|
+
X = df_features.drop([in_features, time_filter_feature], axis=1)
|
|
357
357
|
|
|
358
358
|
return X, y
|
|
359
359
|
|
|
@@ -420,7 +420,7 @@ class PandasForecastingFeatureGroup(ForecastingFeatureGroup):
|
|
|
420
420
|
cls,
|
|
421
421
|
df: pd.DataFrame,
|
|
422
422
|
future_timestamps: List[datetime],
|
|
423
|
-
|
|
423
|
+
in_features: str,
|
|
424
424
|
time_filter_feature: str,
|
|
425
425
|
lag_features: List[int],
|
|
426
426
|
) -> pd.DataFrame:
|
|
@@ -430,7 +430,7 @@ class PandasForecastingFeatureGroup(ForecastingFeatureGroup):
|
|
|
430
430
|
Args:
|
|
431
431
|
df: The pandas DataFrame with historical data
|
|
432
432
|
future_timestamps: List of future timestamps to create features for
|
|
433
|
-
|
|
433
|
+
in_features: The name of the source feature
|
|
434
434
|
time_filter_feature: The name of the time filter feature
|
|
435
435
|
lag_features: List of lag periods to use
|
|
436
436
|
|
|
@@ -446,7 +446,7 @@ class PandasForecastingFeatureGroup(ForecastingFeatureGroup):
|
|
|
446
446
|
# Get the most recent values for lag features
|
|
447
447
|
max_lag = max(lag_features)
|
|
448
448
|
available_values = min(len(df), max_lag)
|
|
449
|
-
last_values = df[
|
|
449
|
+
last_values = df[in_features].iloc[-available_values:].tolist()
|
|
450
450
|
last_values.reverse() # Reverse to get [t-n, ..., t-2, t-1]
|
|
451
451
|
|
|
452
452
|
# Pad with the last value if we don't have enough history
|
|
@@ -457,9 +457,9 @@ class PandasForecastingFeatureGroup(ForecastingFeatureGroup):
|
|
|
457
457
|
for lag in lag_features:
|
|
458
458
|
lag_index = lag - 1 # Convert lag to index (lag 1 = index 0)
|
|
459
459
|
if lag_index < len(last_values):
|
|
460
|
-
future_df[f"{
|
|
460
|
+
future_df[f"{in_features}_lag_{lag}"] = last_values[lag_index]
|
|
461
461
|
else:
|
|
462
|
-
future_df[f"{
|
|
462
|
+
future_df[f"{in_features}_lag_{lag}"] = last_values[-1]
|
|
463
463
|
|
|
464
464
|
# Drop the time filter feature
|
|
465
465
|
future_df = future_df.drop([time_filter_feature], axis=1)
|
|
@@ -513,7 +513,7 @@ class PandasForecastingFeatureGroup(ForecastingFeatureGroup):
|
|
|
513
513
|
algorithm: str,
|
|
514
514
|
horizon: int,
|
|
515
515
|
time_unit: str,
|
|
516
|
-
|
|
516
|
+
in_features: List[str],
|
|
517
517
|
time_filter_feature: str,
|
|
518
518
|
model_artifact: Optional[Any] = None,
|
|
519
519
|
) -> Tuple[pd.Series, pd.Series, pd.Series, Dict[str, Any]]:
|
|
@@ -531,7 +531,7 @@ class PandasForecastingFeatureGroup(ForecastingFeatureGroup):
|
|
|
531
531
|
algorithm: The forecasting algorithm to use
|
|
532
532
|
horizon: The forecast horizon
|
|
533
533
|
time_unit: The time unit for the horizon
|
|
534
|
-
|
|
534
|
+
in_features: List of resolved source feature names to forecast
|
|
535
535
|
time_filter_feature: The name of the time filter feature
|
|
536
536
|
model_artifact: Optional artifact containing a trained model
|
|
537
537
|
|
|
@@ -560,7 +560,7 @@ class PandasForecastingFeatureGroup(ForecastingFeatureGroup):
|
|
|
560
560
|
lag_features = cls._determine_lag_features(horizon, time_unit, len(df))
|
|
561
561
|
|
|
562
562
|
# For multi-column features, use the first column
|
|
563
|
-
source_feature_name =
|
|
563
|
+
source_feature_name = in_features[0] if len(in_features) == 1 else in_features[0]
|
|
564
564
|
|
|
565
565
|
# Create or load the model
|
|
566
566
|
if model_artifact is None:
|