PyPI - mloda - Versions diffs - 0.3.0__py3-none-any.whl → 0.3.2__py3-none-any.whl - Mend

mloda 0.3.0py3-none-any.whl → 0.3.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (92) hide show

mloda_plugins/feature_group/experimental/dimensionality_reduction/pandas.py CHANGED Viewed

@@ -4,15 +4,17 @@ Pandas implementation for dimensionality reduction feature groups.
 from __future__ import annotations
-from typing import Any, List, cast
+from typing import Any, List, TYPE_CHECKING, cast
+if TYPE_CHECKING:
+    from numpy.typing import NDArray
 try:
     import pandas as pd
     import numpy as np
 except ImportError:
     pd = None
-    np = None  # type: ignore
+    np = None  # type: ignore[assignment]
 # Check if required packages are available
 SKLEARN_AVAILABLE = True
@@ -26,7 +28,7 @@ except ImportError:
 from mloda_core.abstract_plugins.compute_frame_work import ComputeFrameWork
-from mloda_plugins.compute_framework.base_implementations.pandas.dataframe import PandasDataframe
+from mloda_plugins.compute_framework.base_implementations.pandas.dataframe import PandasDataFrame
 from mloda_plugins.feature_group.experimental.dimensionality_reduction.base import DimensionalityReductionFeatureGroup
@@ -34,7 +36,7 @@ class PandasDimensionalityReductionFeatureGroup(DimensionalityReductionFeatureGr
     @classmethod
     def compute_framework_rule(cls) -> set[type[ComputeFrameWork]]:
         """Define the compute framework for this feature group."""
-        return {PandasDataframe}
+        return {PandasDataFrame}
     @classmethod
     def _check_source_feature_exists(cls, data: pd.DataFrame, feature_name: str) -> None:
@@ -52,7 +54,7 @@ class PandasDimensionalityReductionFeatureGroup(DimensionalityReductionFeatureGr
             raise ValueError(f"Feature '{feature_name}' not found in the data")
     @classmethod
-    def _add_result_to_data(cls, data: pd.DataFrame, feature_name: str, result: np.ndarray) -> pd.DataFrame:  # type: ignore
+    def _add_result_to_data(cls, data: "pd.DataFrame", feature_name: str, result: "NDArray[Any]") -> "pd.DataFrame":
         """
         Add the dimensionality reduction result to the DataFrame using the multiple result columns pattern.
@@ -83,7 +85,7 @@ class PandasDimensionalityReductionFeatureGroup(DimensionalityReductionFeatureGr
         dimension: int,
         source_features: List[str],
         options: Any,
-    ) -> np.ndarray:  # type: ignore
+    ) -> "NDArray[Any]":
         """
         Perform dimensionality reduction on the specified features.
@@ -184,7 +186,7 @@ class PandasDimensionalityReductionFeatureGroup(DimensionalityReductionFeatureGr
             raise ValueError(f"Unsupported dimensionality reduction algorithm: {algorithm}")
     @classmethod
-    def _perform_pca_reduction(cls, X: np.ndarray, dimension: int, svd_solver: str = "auto") -> np.ndarray:  # type: ignore
+    def _perform_pca_reduction(cls, X: "NDArray[Any]", dimension: int, svd_solver: str = "auto") -> "NDArray[Any]":
         """
         Perform Principal Component Analysis (PCA).
@@ -202,17 +204,17 @@ class PandasDimensionalityReductionFeatureGroup(DimensionalityReductionFeatureGr
         # Perform PCA
         pca = PCA(n_components=dimension, random_state=42, svd_solver=svd_solver)
-        return pca.fit_transform(X)  # type: ignore
+        return cast("NDArray[Any]", pca.fit_transform(X))
     @classmethod
     def _perform_tsne_reduction(
         cls,
-        X: np.ndarray,  # type: ignore
+        X: "NDArray[Any]",
         dimension: int,
         max_iter: int = 250,
         n_iter_without_progress: int = 50,
         method: str = "barnes_hut",
-    ) -> np.ndarray:  # type: ignore
+    ) -> "NDArray[Any]":
         """
         Perform t-Distributed Stochastic Neighbor Embedding (t-SNE).
@@ -254,10 +256,10 @@ class PandasDimensionalityReductionFeatureGroup(DimensionalityReductionFeatureGr
             n_iter_without_progress=n_iter_without_progress,
             method=actual_method,
         )
-        return tsne.fit_transform(X)  # type: ignore
+        return cast("NDArray[Any]", tsne.fit_transform(X))
     @classmethod
-    def _perform_ica_reduction(cls, X: np.ndarray, dimension: int, max_iter: int = 200) -> np.ndarray:  # type: ignore
+    def _perform_ica_reduction(cls, X: "NDArray[Any]", dimension: int, max_iter: int = 200) -> "NDArray[Any]":
         """
         Perform Independent Component Analysis (ICA).
@@ -273,12 +275,22 @@ class PandasDimensionalityReductionFeatureGroup(DimensionalityReductionFeatureGr
         if not SKLEARN_AVAILABLE:
             raise ImportError("scikit-learn is required for ICA dimensionality reduction")
+        # For small datasets, increase tolerance and iterations for better convergence
+        n_samples = X.shape[0]
+        if n_samples < 50:
+            # Small datasets may need more iterations and higher tolerance
+            actual_max_iter = max(max_iter, 1000)
+            tol = 0.01
+        else:
+            actual_max_iter = max_iter
+            tol = 1e-4  # sklearn default
         # Perform ICA
-        ica = FastICA(n_components=dimension, random_state=42, max_iter=max_iter)
-        return ica.fit_transform(X)  # type: ignore
+        ica = FastICA(n_components=dimension, random_state=42, max_iter=actual_max_iter, tol=tol)
+        return cast("NDArray[Any]", ica.fit_transform(X))
     @classmethod
-    def _perform_lda_reduction(cls, X: np.ndarray, dimension: int, df: pd.DataFrame) -> np.ndarray:  # type: ignore
+    def _perform_lda_reduction(cls, X: "NDArray[Any]", dimension: int, df: "pd.DataFrame") -> "NDArray[Any]":
         """
         Perform Linear Discriminant Analysis (LDA).
@@ -307,10 +319,10 @@ class PandasDimensionalityReductionFeatureGroup(DimensionalityReductionFeatureGr
         # Perform LDA
         lda = LinearDiscriminantAnalysis(n_components=dimension)
-        return lda.fit_transform(X, y)  # type: ignore
+        return cast("NDArray[Any]", lda.fit_transform(X, y))
     @classmethod
-    def _perform_isomap_reduction(cls, X: np.ndarray, dimension: int, n_neighbors: int = 5) -> np.ndarray:  # type: ignore
+    def _perform_isomap_reduction(cls, X: "NDArray[Any]", dimension: int, n_neighbors: int = 5) -> "NDArray[Any]":
         """
         Perform Isometric Mapping (Isomap).
@@ -328,4 +340,4 @@ class PandasDimensionalityReductionFeatureGroup(DimensionalityReductionFeatureGr
         # Perform Isomap
         isomap = Isomap(n_components=dimension, n_neighbors=n_neighbors)
-        return isomap.fit_transform(X)  # type: ignore
+        return cast("NDArray[Any]", isomap.fit_transform(X))

mloda_plugins/feature_group/experimental/dynamic_feature_group_factory/dynamic_feature_group_factory.py CHANGED Viewed

@@ -141,7 +141,7 @@ class DynamicFeatureGroupCreator:
     properties = {
         "match_feature_group_criteria": custom_match_criteria,
         "input_features": custom_input_features,
-        "compute_framework_rule": lambda: {PandasDataframe},
+        "compute_framework_rule": lambda: {PandasDataFrame},
     }
     CustomFG = DynamicFeatureGroupCreator.create(
@@ -202,70 +202,70 @@ class DynamicFeatureGroupCreator:
         if class_name in DynamicFeatureGroupCreator._created_classes:
             return DynamicFeatureGroupCreator._created_classes[class_name]
-        def set_feature_name(self, config: Options, feature_name: FeatureName) -> FeatureName:  # type: ignore
+        def set_feature_name(self, config: Options, feature_name: FeatureName) -> FeatureName:  # type: ignore[no-untyped-def]
             if "set_feature_name" in properties:
-                return properties["set_feature_name"](self, config, feature_name)  # type: ignore
+                return properties["set_feature_name"](self, config, feature_name)  # type: ignore[no-any-return]
             return feature_name
-        def match_feature_group_criteria(  # type: ignore
+        def match_feature_group_criteria(  # type: ignore[no-untyped-def]
             cls,
             feature_name: Union[FeatureName, str],
             options: Options,
             data_access_collection: Optional[DataAccessCollection] = None,
         ) -> bool:
             if "match_feature_group_criteria" in properties:
-                return properties["match_feature_group_criteria"](cls, feature_name, options, data_access_collection)  # type: ignore
-            return super(new_class, cls).match_feature_group_criteria(feature_name, options, data_access_collection)  # type: ignore
+                return properties["match_feature_group_criteria"](cls, feature_name, options, data_access_collection)  # type: ignore[no-any-return]
+            return super(new_class, cls).match_feature_group_criteria(feature_name, options, data_access_collection)  # type: ignore[misc, arg-type, no-any-return]
-        def input_data(cls) -> Optional[BaseInputData]:  # type: ignore
+        def input_data(cls) -> Optional[BaseInputData]:  # type: ignore[no-untyped-def]
             if "input_data" in properties:
-                return properties["input_data"]()  # type: ignore
-            return super(new_class, cls).input_data()  # type: ignore
+                return properties["input_data"]()  # type: ignore[no-any-return]
+            return super(new_class, cls).input_data()  # type: ignore[misc, arg-type, no-any-return]
-        def validate_input_features(cls, data: Any, features: FeatureSet) -> Optional[bool]:  # type: ignore
+        def validate_input_features(cls, data: Any, features: FeatureSet) -> Optional[bool]:  # type: ignore[no-untyped-def]
             if "validate_input_features" in properties:
-                return properties["validate_input_features"](cls, data, features)  # type: ignore
-            return super(new_class, cls).validate_input_features(data, features)  # type: ignore
+                return properties["validate_input_features"](cls, data, features)  # type: ignore[no-any-return]
+            return super(new_class, cls).validate_input_features(data, features)  # type: ignore[misc, arg-type, no-any-return]
-        def calculate_feature(cls, data: Any, features: FeatureSet) -> Any:  # type: ignore
+        def calculate_feature(cls, data: Any, features: FeatureSet) -> Any:  # type: ignore[no-untyped-def]
             if "calculate_feature" in properties:
                 return properties["calculate_feature"](cls, data, features)
-            return super(new_class, cls).calculate_feature(data, features)  # type: ignore
+            return super(new_class, cls).calculate_feature(data, features)  # type: ignore[misc, arg-type]
-        def validate_output_features(cls, data: Any, features: FeatureSet) -> Optional[bool]:  # type: ignore
+        def validate_output_features(cls, data: Any, features: FeatureSet) -> Optional[bool]:  # type: ignore[no-untyped-def]
             if "validate_output_features" in properties:
-                return properties["validate_output_features"](cls, data, features)  # type: ignore
-            return super(new_class, cls).validate_output_features(data, features)  # type: ignore
+                return properties["validate_output_features"](cls, data, features)  # type: ignore[no-any-return]
+            return super(new_class, cls).validate_output_features(data, features)  # type: ignore[misc, arg-type, no-any-return]
-        def artifact(cls) -> Optional[Type[Any]]:  # type: ignore
+        def artifact(cls) -> Optional[Type[Any]]:  # type: ignore[no-untyped-def]
             if "artifact" in properties:
-                return properties["artifact"]()  # type: ignore
-            return super(new_class, cls).artifact()  # type: ignore
+                return properties["artifact"]()  # type: ignore[no-any-return]
+            return super(new_class, cls).artifact()  # type: ignore[misc, arg-type, no-any-return]
-        def compute_framework_rule(cls) -> Union[bool, Set[Type[ComputeFrameWork]]]:  # type: ignore
+        def compute_framework_rule(cls) -> Union[bool, Set[Type[ComputeFrameWork]]]:  # type: ignore[no-untyped-def]
             if "compute_framework_rule" in properties:
-                return properties["compute_framework_rule"]()  # type: ignore
-            return super(new_class, cls).compute_framework_rule()  # type: ignore
+                return properties["compute_framework_rule"]()  # type: ignore[no-any-return]
+            return super(new_class, cls).compute_framework_rule()  # type: ignore[misc, arg-type, no-any-return]
-        def return_data_type_rule(cls, feature: Any) -> Optional[DataType]:  # type: ignore
+        def return_data_type_rule(cls, feature: Any) -> Optional[DataType]:  # type: ignore[no-untyped-def]
             if "return_data_type_rule" in properties:
-                return properties["return_data_type_rule"](cls, feature)  # type: ignore
-            return super(new_class, cls).return_data_type_rule(feature)  # type: ignore
+                return properties["return_data_type_rule"](cls, feature)  # type: ignore[no-any-return]
+            return super(new_class, cls).return_data_type_rule(feature)  # type: ignore[misc, arg-type, no-any-return]
-        def input_features(self, options: Options, feature_name: FeatureName) -> Optional[Set[Any]]:  # type: ignore
+        def input_features(self, options: Options, feature_name: FeatureName) -> Optional[Set[Any]]:  # type: ignore[no-untyped-def]
             if "input_features" in properties:
-                return properties["input_features"](self, options, feature_name)  # type: ignore
-            return super(new_class, self).input_features(options, feature_name)  # type: ignore
+                return properties["input_features"](self, options, feature_name)  # type: ignore[no-any-return]
+            return super(new_class, self).input_features(options, feature_name)  # type: ignore[misc, arg-type, no-any-return]
-        def index_columns(cls) -> Optional[List[Index]]:  # type: ignore
+        def index_columns(cls) -> Optional[List[Index]]:  # type: ignore[no-untyped-def]
             if "index_columns" in properties:
-                return properties["index_columns"]()  # type: ignore
-            return super(new_class, cls).index_columns()  # type: ignore
+                return properties["index_columns"]()  # type: ignore[no-any-return]
+            return super(new_class, cls).index_columns()  # type: ignore[misc, arg-type, no-any-return]
-        def supports_index(cls, index: Index) -> Optional[bool]:  # type: ignore
+        def supports_index(cls, index: Index) -> Optional[bool]:  # type: ignore[no-untyped-def]
             if "supports_index" in properties:
-                return properties["supports_index"](cls, index)  # type: ignore
-            return super(new_class, cls).supports_index(index)  # type: ignore
+                return properties["supports_index"](cls, index)  # type: ignore[no-any-return]
+            return super(new_class, cls).supports_index(index)  # type: ignore[misc, arg-type, no-any-return]
         new_class = type(
             class_name,

mloda_plugins/feature_group/experimental/forecasting/base.py CHANGED Viewed

@@ -4,12 +4,16 @@ Base implementation for forecasting feature groups.
 from __future__ import annotations
+from abc import abstractmethod
 from typing import Any, List, Optional, Set, Type, Union
 from mloda_core.abstract_plugins.abstract_feature_group import AbstractFeatureGroup
 from mloda_core.abstract_plugins.components.base_artifact import BaseArtifact
 from mloda_core.abstract_plugins.components.feature import Feature
-from mloda_core.abstract_plugins.components.feature_chainer.feature_chain_parser import FeatureChainParser
+from mloda_core.abstract_plugins.components.feature_chainer.feature_chain_parser import (
+    CHAIN_SEPARATOR,
+    FeatureChainParser,
+)
 from mloda_core.abstract_plugins.components.feature_name import FeatureName
 from mloda_core.abstract_plugins.components.feature_set import FeatureSet
 from mloda_core.abstract_plugins.components.options import Options
@@ -30,7 +34,7 @@ class ForecastingFeatureGroup(AbstractFeatureGroup):
     ### 1. String-Based Creation
-    Features follow the naming pattern: `{mloda_source_features}__{algorithm}_forecast_{horizon}{time_unit}`
+    Features follow the naming pattern: `{in_features}__{algorithm}_forecast_{horizon}{time_unit}`
     Examples:
     ```python
@@ -53,7 +57,7 @@ class ForecastingFeatureGroup(AbstractFeatureGroup):
                 ForecastingFeatureGroup.ALGORITHM: "linear",
                 ForecastingFeatureGroup.HORIZON: 7,
                 ForecastingFeatureGroup.TIME_UNIT: "day",
-                DefaultOptionKeys.mloda_source_features: "sales",
+                DefaultOptionKeys.in_features: "sales",
             }
         )
     )
@@ -66,7 +70,7 @@ class ForecastingFeatureGroup(AbstractFeatureGroup):
     - `algorithm`: The forecasting algorithm to use
     - `horizon`: The forecast horizon (number of time units)
     - `time_unit`: The time unit for the horizon
-    - `mloda_source_features`: The source feature to generate forecasts for
+    - `in_features`: The source feature to generate forecasts for
     ### Group Parameters
     Currently none for ForecastingFeatureGroup. Parameters that affect Feature Group
@@ -128,7 +132,6 @@ class ForecastingFeatureGroup(AbstractFeatureGroup):
     # Define the prefix pattern for this feature group
     PREFIX_PATTERN = r".*__([\w]+)_forecast_(\d+)([\w]+)$"
-    PATTERN = "__"
     # Property mapping for configuration-based features with group/context separation
     PROPERTY_MAPPING = {
@@ -151,7 +154,7 @@ class ForecastingFeatureGroup(AbstractFeatureGroup):
             DefaultOptionKeys.mloda_context: True,
             DefaultOptionKeys.mloda_strict_validation: True,
         },
-        DefaultOptionKeys.mloda_source_features: {
+        DefaultOptionKeys.in_features: {
             "explanation": "Source feature to generate forecasts for",
             DefaultOptionKeys.mloda_context: True,
             DefaultOptionKeys.mloda_strict_validation: False,
@@ -202,13 +205,13 @@ class ForecastingFeatureGroup(AbstractFeatureGroup):
         source_feature: str | None = None
         # Try string-based parsing first
-        _, source_feature = FeatureChainParser.parse_feature_name(feature_name, self.PATTERN, [self.PREFIX_PATTERN])
+        _, source_feature = FeatureChainParser.parse_feature_name(feature_name, [self.PREFIX_PATTERN])
         if source_feature is not None:
             time_filter_feature = Feature(self.get_time_filter_feature(options))
             return {Feature(source_feature), time_filter_feature}
         # Fall back to configuration-based approach
-        source_features = options.get_source_features()
+        source_features = options.get_in_features()
         if len(source_features) != 1:
             raise ValueError(
                 f"Expected exactly one source feature, but found {len(source_features)}: {source_features}"
@@ -246,7 +249,7 @@ class ForecastingFeatureGroup(AbstractFeatureGroup):
         if len(parts) < 3 or parts[1] != "forecast":
             raise ValueError(
                 f"Invalid forecast feature name format: {feature_name}. "
-                f"Expected format: {{mloda_source_features}}__{{algorithm}}_forecast_{{horizon}}{{time_unit}}"
+                f"Expected format: {{in_features}}__{{algorithm}}_forecast_{{horizon}}{{time_unit}}"
             )
         algorithm = parts[0]
@@ -297,7 +300,6 @@ class ForecastingFeatureGroup(AbstractFeatureGroup):
             feature_name,
             options,
             property_mapping=cls.PROPERTY_MAPPING,
-            pattern=cls.PATTERN,
             prefix_patterns=[cls.PREFIX_PATTERN],
         )
@@ -306,7 +308,7 @@ class ForecastingFeatureGroup(AbstractFeatureGroup):
             feature_name_str = feature_name.name if isinstance(feature_name, FeatureName) else feature_name
             # Check if this is a string-based feature (contains the pattern)
-            if cls.PATTERN in feature_name_str:
+            if FeatureChainParser.is_chained_feature(feature_name_str):
                 try:
                     # Use existing validation logic that validates algorithm, horizon, and time_unit
                     cls.parse_forecast_suffix(feature_name_str)
@@ -350,13 +352,13 @@ class ForecastingFeatureGroup(AbstractFeatureGroup):
         # Process each requested feature with the original clean data
         for feature in features.features:
-            algorithm, horizon, time_unit, mloda_source_features = cls._extract_forecasting_parameters(feature)
+            algorithm, horizon, time_unit, in_features = cls._extract_forecasting_parameters(feature)
             # Resolve multi-column features automatically
-            # If mloda_source_features is "onehot_encoded__product", this discovers
+            # If in_features is "onehot_encoded__product", this discovers
             # ["onehot_encoded__product~0", "onehot_encoded__product~1", ...]
             available_columns = cls._get_available_columns(original_data)
-            resolved_columns = cls.resolve_multi_column_feature(mloda_source_features, available_columns)
+            resolved_columns = cls.resolve_multi_column_feature(in_features, available_columns)
             # Check that resolved columns exist
             cls._check_source_features_exist(original_data, resolved_columns)
@@ -428,14 +430,15 @@ class ForecastingFeatureGroup(AbstractFeatureGroup):
         # Try string-based parsing first
         feature_name_str = feature.name.name if hasattr(feature.name, "name") else str(feature.name)
-        if cls.PATTERN in feature_name_str:
+        if FeatureChainParser.is_chained_feature(feature_name_str):
             algorithm, horizon, time_unit = cls.parse_forecast_suffix(feature_name_str)
-            # Extract source feature (everything before the __)
-            source_feature_name = feature_name_str.split(cls.PATTERN)[0]
+            # Extract source feature name (everything before the last double underscore)
+            source_feature_name = feature_name_str.rsplit(CHAIN_SEPARATOR, 1)[0]
             return algorithm, horizon, time_unit, source_feature_name
         # Fall back to configuration-based approach
-        source_features = feature.options.get_source_features()
+        source_features = feature.options.get_in_features()
         source_feature = next(iter(source_features))
         source_feature_name = source_feature.get_name()
@@ -466,6 +469,7 @@ class ForecastingFeatureGroup(AbstractFeatureGroup):
         return algorithm, horizon, time_unit, source_feature_name
     @classmethod
+    @abstractmethod
     def _check_time_filter_feature_exists(cls, data: Any, time_filter_feature: str) -> None:
         """
         Check if the time filter feature exists in the data.
@@ -477,9 +481,10 @@ class ForecastingFeatureGroup(AbstractFeatureGroup):
         Raises:
             ValueError: If the time filter feature does not exist in the data
         """
-        raise NotImplementedError(f"_check_time_filter_feature_exists not implemented in {cls.__name__}")
+        ...
     @classmethod
+    @abstractmethod
     def _check_time_filter_feature_is_datetime(cls, data: Any, time_filter_feature: str) -> None:
         """
         Check if the time filter feature is a datetime column.
@@ -491,9 +496,10 @@ class ForecastingFeatureGroup(AbstractFeatureGroup):
         Raises:
             ValueError: If the time filter feature is not a datetime column
         """
-        raise NotImplementedError(f"_check_time_filter_feature_is_datetime not implemented in {cls.__name__}")
+        ...
     @classmethod
+    @abstractmethod
     def _get_available_columns(cls, data: Any) -> Set[str]:
         """
         Get the set of available column names from the data.
@@ -504,9 +510,10 @@ class ForecastingFeatureGroup(AbstractFeatureGroup):
         Returns:
             Set of column names available in the data
         """
-        raise NotImplementedError(f"_get_available_columns not implemented in {cls.__name__}")
+        ...
     @classmethod
+    @abstractmethod
     def _check_source_features_exist(cls, data: Any, feature_names: List[str]) -> None:
         """
         Check if the resolved source features exist in the data.
@@ -518,9 +525,10 @@ class ForecastingFeatureGroup(AbstractFeatureGroup):
         Raises:
             ValueError: If none of the features exist in the data
         """
-        raise NotImplementedError(f"_check_source_features_exist not implemented in {cls.__name__}")
+        ...
     @classmethod
+    @abstractmethod
     def _add_result_to_data(cls, data: Any, feature_name: str, result: Any) -> Any:
         """
         Add the result to the data.
@@ -533,16 +541,17 @@ class ForecastingFeatureGroup(AbstractFeatureGroup):
         Returns:
             The updated data
         """
-        raise NotImplementedError(f"_add_result_to_data not implemented in {cls.__name__}")
+        ...
     @classmethod
+    @abstractmethod
     def _perform_forecasting(
         cls,
         data: Any,
         algorithm: str,
         horizon: int,
         time_unit: str,
-        mloda_source_features: List[str],
+        in_features: List[str],
         time_filter_feature: str,
         model_artifact: Optional[Any] = None,
     ) -> tuple[Any, Optional[Any]]:
@@ -558,23 +567,24 @@ class ForecastingFeatureGroup(AbstractFeatureGroup):
             algorithm: The forecasting algorithm to use
             horizon: The forecast horizon
             time_unit: The time unit for the horizon
-            mloda_source_features: List of resolved source feature names to forecast
+            in_features: List of resolved source feature names to forecast
             time_filter_feature: The name of the time filter feature
             model_artifact: Optional artifact containing a trained model
         Returns:
             A tuple containing (forecast_result, updated_artifact)
         """
-        raise NotImplementedError(f"_perform_forecasting not implemented in {cls.__name__}")
+        ...
     @classmethod
+    @abstractmethod
     def _perform_forecasting_with_confidence(
         cls,
         data: Any,
         algorithm: str,
         horizon: int,
         time_unit: str,
-        mloda_source_features: List[str],
+        in_features: List[str],
         time_filter_feature: str,
         model_artifact: Optional[Any] = None,
     ) -> tuple[Any, Any, Any, Optional[Any]]:
@@ -588,7 +598,7 @@ class ForecastingFeatureGroup(AbstractFeatureGroup):
             algorithm: The forecasting algorithm to use
             horizon: The forecast horizon
             time_unit: The time unit for the horizon
-            mloda_source_features: List of resolved source feature names to forecast
+            in_features: List of resolved source feature names to forecast
             time_filter_feature: The name of the time filter feature
             model_artifact: Optional artifact containing a trained model
@@ -599,4 +609,4 @@ class ForecastingFeatureGroup(AbstractFeatureGroup):
             - upper_bound: The upper confidence bound
             - updated_artifact: The updated artifact (or None)
         """
-        raise NotImplementedError(f"_perform_forecasting_with_confidence not implemented in {cls.__name__}")
+        ...

mloda_plugins/feature_group/experimental/forecasting/pandas.py CHANGED Viewed

@@ -28,7 +28,7 @@ except ImportError:
 from mloda_core.abstract_plugins.compute_frame_work import ComputeFrameWork
-from mloda_plugins.compute_framework.base_implementations.pandas.dataframe import PandasDataframe
+from mloda_plugins.compute_framework.base_implementations.pandas.dataframe import PandasDataFrame
 from mloda_plugins.feature_group.experimental.forecasting.base import ForecastingFeatureGroup
@@ -36,7 +36,7 @@ class PandasForecastingFeatureGroup(ForecastingFeatureGroup):
     @classmethod
     def compute_framework_rule(cls) -> set[type[ComputeFrameWork]]:
         """Define the compute framework for this feature group."""
-        return {PandasDataframe}
+        return {PandasDataFrame}
     @classmethod
     def _get_available_columns(cls, data: pd.DataFrame) -> Set[str]:
@@ -120,7 +120,7 @@ class PandasForecastingFeatureGroup(ForecastingFeatureGroup):
         algorithm: str,
         horizon: int,
         time_unit: str,
-        mloda_source_features: List[str],
+        in_features: List[str],
         time_filter_feature: str,
         model_artifact: Optional[Any] = None,
     ) -> Tuple[pd.Series, Dict[str, Any]]:
@@ -142,7 +142,7 @@ class PandasForecastingFeatureGroup(ForecastingFeatureGroup):
             algorithm: The forecasting algorithm to use
             horizon: The forecast horizon
             time_unit: The time unit for the horizon
-            mloda_source_features: List of resolved source feature names to forecast
+            in_features: List of resolved source feature names to forecast
             time_filter_feature: The name of the time filter feature
             model_artifact: Optional artifact containing a trained model
@@ -173,7 +173,7 @@ class PandasForecastingFeatureGroup(ForecastingFeatureGroup):
         # For multi-column features, we need to handle each column separately or aggregate them
         # For now, we'll use the first column for single-column behavior
         # In the future, this could be extended to forecast multiple columns or aggregated columns
-        source_feature_name = mloda_source_features[0] if len(mloda_source_features) == 1 else mloda_source_features[0]
+        source_feature_name = in_features[0] if len(in_features) == 1 else in_features[0]
         # Create or load the model
         if model_artifact is None:
@@ -314,14 +314,14 @@ class PandasForecastingFeatureGroup(ForecastingFeatureGroup):
     @classmethod
     def _create_features(
-        cls, df: pd.DataFrame, mloda_source_features: str, time_filter_feature: str, lag_features: List[int]
+        cls, df: pd.DataFrame, in_features: str, time_filter_feature: str, lag_features: List[int]
     ) -> Tuple[pd.DataFrame, pd.Series]:
         """
         Create features for training the forecasting model.
         Args:
             df: The pandas DataFrame
-            mloda_source_features: The name of the source feature
+            in_features: The name of the source feature
             time_filter_feature: The name of the time filter feature
             lag_features: List of lag periods to use
@@ -332,13 +332,13 @@ class PandasForecastingFeatureGroup(ForecastingFeatureGroup):
         df_features = df.copy()
         # Extract target variable
-        y = df_features[mloda_source_features]
+        y = df_features[in_features]
         # Create time-based features
         df_features = cls._create_time_features(df_features, time_filter_feature)
         # Create lag features (previous values)
-        df_features = cls._create_lag_features(df_features, mloda_source_features, lags=lag_features)
+        df_features = cls._create_lag_features(df_features, in_features, lags=lag_features)
         # Drop rows with NaN values (from lag features)
         df_features = df_features.dropna()
@@ -353,7 +353,7 @@ class PandasForecastingFeatureGroup(ForecastingFeatureGroup):
             )
         # Drop the original source feature and time filter feature
-        X = df_features.drop([mloda_source_features, time_filter_feature], axis=1)
+        X = df_features.drop([in_features, time_filter_feature], axis=1)
         return X, y
@@ -420,7 +420,7 @@ class PandasForecastingFeatureGroup(ForecastingFeatureGroup):
         cls,
         df: pd.DataFrame,
         future_timestamps: List[datetime],
-        mloda_source_features: str,
+        in_features: str,
         time_filter_feature: str,
         lag_features: List[int],
     ) -> pd.DataFrame:
@@ -430,7 +430,7 @@ class PandasForecastingFeatureGroup(ForecastingFeatureGroup):
         Args:
             df: The pandas DataFrame with historical data
             future_timestamps: List of future timestamps to create features for
-            mloda_source_features: The name of the source feature
+            in_features: The name of the source feature
             time_filter_feature: The name of the time filter feature
             lag_features: List of lag periods to use
@@ -446,7 +446,7 @@ class PandasForecastingFeatureGroup(ForecastingFeatureGroup):
         # Get the most recent values for lag features
         max_lag = max(lag_features)
         available_values = min(len(df), max_lag)
-        last_values = df[mloda_source_features].iloc[-available_values:].tolist()
+        last_values = df[in_features].iloc[-available_values:].tolist()
         last_values.reverse()  # Reverse to get [t-n, ..., t-2, t-1]
         # Pad with the last value if we don't have enough history
@@ -457,9 +457,9 @@ class PandasForecastingFeatureGroup(ForecastingFeatureGroup):
         for lag in lag_features:
             lag_index = lag - 1  # Convert lag to index (lag 1 = index 0)
             if lag_index < len(last_values):
-                future_df[f"{mloda_source_features}_lag_{lag}"] = last_values[lag_index]
+                future_df[f"{in_features}_lag_{lag}"] = last_values[lag_index]
             else:
-                future_df[f"{mloda_source_features}_lag_{lag}"] = last_values[-1]
+                future_df[f"{in_features}_lag_{lag}"] = last_values[-1]
         # Drop the time filter feature
         future_df = future_df.drop([time_filter_feature], axis=1)
@@ -513,7 +513,7 @@ class PandasForecastingFeatureGroup(ForecastingFeatureGroup):
         algorithm: str,
         horizon: int,
         time_unit: str,
-        mloda_source_features: List[str],
+        in_features: List[str],
         time_filter_feature: str,
         model_artifact: Optional[Any] = None,
     ) -> Tuple[pd.Series, pd.Series, pd.Series, Dict[str, Any]]:
@@ -531,7 +531,7 @@ class PandasForecastingFeatureGroup(ForecastingFeatureGroup):
             algorithm: The forecasting algorithm to use
             horizon: The forecast horizon
             time_unit: The time unit for the horizon
-            mloda_source_features: List of resolved source feature names to forecast
+            in_features: List of resolved source feature names to forecast
             time_filter_feature: The name of the time filter feature
             model_artifact: Optional artifact containing a trained model
@@ -560,7 +560,7 @@ class PandasForecastingFeatureGroup(ForecastingFeatureGroup):
         lag_features = cls._determine_lag_features(horizon, time_unit, len(df))
         # For multi-column features, use the first column
-        source_feature_name = mloda_source_features[0] if len(mloda_source_features) == 1 else mloda_source_features[0]
+        source_feature_name = in_features[0] if len(in_features) == 1 else in_features[0]
         # Create or load the model
         if model_artifact is None:

mloda 0.3.0__py3-none-any.whl → 0.3.2__py3-none-any.whl

mloda 0.3.0py3-none-any.whl → 0.3.2py3-none-any.whl