PyPI - mloda - Versions diffs - 0.3.3__py3-none-any.whl → 0.4.1__py3-none-any.whl - Mend

mloda 0.3.3py3-none-any.whl → 0.4.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (201) hide show

mloda_plugins/feature_group/experimental/sklearn/scaling/base.py CHANGED Viewed

@@ -5,20 +5,21 @@ Base implementation for scikit-learn scaling feature groups.
 from __future__ import annotations
 import datetime
-from typing import Any, Dict, Optional, Set, Type, Union
-from mloda_core.abstract_plugins.abstract_feature_group import AbstractFeatureGroup
-from mloda_core.abstract_plugins.components.feature import Feature
-from mloda_core.abstract_plugins.components.feature_name import FeatureName
-from mloda_core.abstract_plugins.components.feature_set import FeatureSet
-from mloda_core.abstract_plugins.components.options import Options
-from mloda_core.abstract_plugins.components.feature_chainer.feature_chain_parser import FeatureChainParser
-from mloda_core.abstract_plugins.components.base_artifact import BaseArtifact
+from typing import Any, Dict, Optional, Type
+from mloda import FeatureGroup
+from mloda import Feature
+from mloda.provider import FeatureSet
+from mloda.provider import FeatureChainParser
+from mloda.provider import (
+    FeatureChainParserMixin,
+)
+from mloda.provider import BaseArtifact
 from mloda_plugins.feature_group.experimental.default_options_key import DefaultOptionKeys
 from mloda_plugins.feature_group.experimental.sklearn.sklearn_artifact import SklearnArtifact
-class ScalingFeatureGroup(AbstractFeatureGroup):
+class ScalingFeatureGroup(FeatureChainParserMixin, FeatureGroup):
     """
     Base class for scikit-learn scaling feature groups.
@@ -82,17 +83,21 @@ class ScalingFeatureGroup(AbstractFeatureGroup):
     PATTERN = "__"
     PREFIX_PATTERN = r".*__(standard|minmax|robust|normalizer)_scaled$"
+    # In-feature configuration for FeatureChainParserMixin
+    MIN_IN_FEATURES = 1
+    MAX_IN_FEATURES = 1
     # Property mapping for new configuration-based approach
     PROPERTY_MAPPING = {
         SCALER_TYPE: {
             **SUPPORTED_SCALERS,  # All supported scaler types as valid options
-            DefaultOptionKeys.mloda_context: True,  # Context parameter
-            DefaultOptionKeys.mloda_strict_validation: True,  # Enable strict validation
+            DefaultOptionKeys.context: True,  # Context parameter
+            DefaultOptionKeys.strict_validation: True,  # Enable strict validation
         },
         DefaultOptionKeys.in_features: {
             "explanation": "Source feature to scale",
-            DefaultOptionKeys.mloda_context: True,  # Context parameter
-            DefaultOptionKeys.mloda_strict_validation: False,  # Flexible validation
+            DefaultOptionKeys.context: True,  # Context parameter
+            DefaultOptionKeys.strict_validation: False,  # Flexible validation
         },
     }
@@ -101,22 +106,6 @@ class ScalingFeatureGroup(AbstractFeatureGroup):
         """Return the artifact class for sklearn scaler persistence."""
         return SklearnArtifact
-    def input_features(self, options: Options, feature_name: FeatureName) -> Optional[Set[Feature]]:
-        """Extract source feature from either configuration-based options or string parsing."""
-        # Try string-based parsing first
-        _, source_feature = FeatureChainParser.parse_feature_name(feature_name, [self.PREFIX_PATTERN])
-        if source_feature is not None:
-            return {Feature(source_feature)}
-        # Fall back to configuration-based approach
-        source_features = options.get_in_features()
-        if len(source_features) != 1:
-            raise ValueError(
-                f"Expected exactly one source feature, but found {len(source_features)}: {source_features}"
-            )
-        return set(source_features)
     @classmethod
     def get_scaler_type(cls, feature_name: str) -> str:
         """Extract the scaler type from the feature name."""
@@ -133,22 +122,6 @@ class ScalingFeatureGroup(AbstractFeatureGroup):
         return scaler_type
-    @classmethod
-    def match_feature_group_criteria(
-        cls,
-        feature_name: Union[FeatureName, str],
-        options: Options,
-        data_access_collection: Optional[Any] = None,
-    ) -> bool:
-        """Check if feature name matches the expected pattern using unified parser."""
-        # Use the unified parser with property mapping for full configuration support
-        return FeatureChainParser.match_configuration_feature_chain_parser(
-            feature_name,
-            options,
-            property_mapping=cls.PROPERTY_MAPPING,
-            prefix_patterns=[cls.PREFIX_PATTERN],
-        )
     @classmethod
     def calculate_feature(cls, data: Any, features: FeatureSet) -> Any:
         """
@@ -213,33 +186,44 @@ class ScalingFeatureGroup(AbstractFeatureGroup):
         Raises:
             ValueError: If parameters cannot be extracted
         """
-        scaler_type = None
-        source_feature_name: str | None = None
+        source_features = cls._extract_source_features(feature)
+        scaler_type = cls._extract_scaler_type(feature)
+        if scaler_type is None:
+            raise ValueError(f"Could not extract scaler type from: {feature.name}")
+        return scaler_type, source_features[0]
-        # Try string-based parsing first
+    @classmethod
+    def _extract_scaler_type(cls, feature: Feature) -> Optional[str]:
+        """
+        Extract scaler type from a feature.
+        Tries string-based parsing first, falls back to configuration-based approach.
+        Args:
+            feature: The feature to extract scaler type from
+        Returns:
+            The scaler type string
+        Raises:
+            ValueError: If scaler type is unsupported
+        """
         feature_name_str = feature.name.name if hasattr(feature.name, "name") else str(feature.name)
+        # Try string-based parsing first
         if FeatureChainParser.is_chained_feature(feature_name_str):
             scaler_type = cls.get_scaler_type(feature_name_str)
-            source_feature_name = FeatureChainParser.extract_source_feature(feature_name_str, cls.PREFIX_PATTERN)
-            return scaler_type, source_feature_name
+            return scaler_type
         # Fall back to configuration-based approach
-        source_features = feature.options.get_in_features()
-        source_feature = next(iter(source_features))
-        source_feature_name = source_feature.get_name()
         scaler_type = feature.options.get(cls.SCALER_TYPE)
-        if scaler_type is None or source_feature_name is None:
-            raise ValueError(f"Could not extract scaler type and source feature from: {feature.name}")
-        if scaler_type not in cls.SUPPORTED_SCALERS:
+        if scaler_type is not None and scaler_type not in cls.SUPPORTED_SCALERS:
             raise ValueError(
                 f"Unsupported scaler type: {scaler_type}. Supported types: {', '.join(cls.SUPPORTED_SCALERS.keys())}"
             )
-        return scaler_type, source_feature_name
+        return str(scaler_type) if scaler_type is not None else None
     @classmethod
     def _import_sklearn_components(cls) -> Dict[str, Any]:

mloda_plugins/feature_group/experimental/sklearn/scaling/pandas.py CHANGED Viewed

@@ -6,7 +6,7 @@ from __future__ import annotations
 from typing import Any, Set, Type, Union
-from mloda_core.abstract_plugins.compute_frame_work import ComputeFrameWork
+from mloda import ComputeFramework
 from mloda_plugins.compute_framework.base_implementations.pandas.dataframe import PandasDataFrame
 from mloda_plugins.feature_group.experimental.sklearn.scaling.base import ScalingFeatureGroup
@@ -21,7 +21,7 @@ class PandasScalingFeatureGroup(ScalingFeatureGroup):
     """
     @classmethod
-    def compute_framework_rule(cls) -> Union[bool, Set[Type[ComputeFrameWork]]]:
+    def compute_framework_rule(cls) -> Union[bool, Set[Type[ComputeFramework]]]:
         """Specify that this feature group works with Pandas."""
         return {PandasDataFrame}

mloda_plugins/feature_group/experimental/sklearn/sklearn_artifact.py CHANGED Viewed

@@ -4,14 +4,13 @@ Artifact for storing fitted scikit-learn transformers and estimators.
 import json
 import base64
-import os
 import hashlib
 import tempfile
 from pathlib import Path
 from typing import Any, Dict, Optional
-from mloda_core.abstract_plugins.components.base_artifact import BaseArtifact
-from mloda_core.abstract_plugins.components.feature_set import FeatureSet
+from mloda.provider import BaseArtifact
+from mloda.provider import FeatureSet
 class SklearnArtifact(BaseArtifact):

mloda_plugins/feature_group/experimental/source_input_feature.py CHANGED Viewed

@@ -39,16 +39,16 @@ Further, it allows defining:
 """
 from typing import Any, Dict, NamedTuple, Optional, Set, Tuple, Type, Union
-from mloda_core.abstract_plugins.abstract_feature_group import AbstractFeatureGroup
-from mloda_core.abstract_plugins.components.feature import Feature
-from mloda_core.abstract_plugins.components.feature_name import FeatureName
-from mloda_core.abstract_plugins.components.index.index import Index
-from mloda_core.abstract_plugins.components.link import JoinType, Link, JoinSpec
-from mloda_core.abstract_plugins.components.options import Options
+from mloda import FeatureGroup
+from mloda import Feature
+from mloda.user import FeatureName
+from mloda.user import Index
+from mloda.user import JoinType, Link, JoinSpec
+from mloda import Options
 from mloda_plugins.feature_group.experimental.default_options_key import DefaultOptionKeys
-class SourceInputFeature(AbstractFeatureGroup):
+class SourceInputFeature(FeatureGroup):
     """
     This feature group focuses on defining input features, especially when they originate
     from other sources or require joins/merges.
@@ -100,19 +100,19 @@ class SourceTuple(NamedTuple):
     Attributes:
         feature_name: The name of the feature.
-        source_class: (Optional) The source class of the feature, can be an `AbstractFeatureGroup` class or a `str` representing a scope.
+        source_class: (Optional) The source class of the feature, can be an `FeatureGroup` class or a `str` representing a scope.
         source_value: (Optional) The value associated with the source class, if applicable.
-        left_link: (Optional)  A tuple containing the left-side `AbstractFeatureGroup` class and index for join operations.
-        right_link: (Optional) A tuple containing the right-side `AbstractFeatureGroup` class and index for join operations.
+        left_link: (Optional)  A tuple containing the left-side `FeatureGroup` class and index for join operations.
+        right_link: (Optional) A tuple containing the right-side `FeatureGroup` class and index for join operations.
         join_type: (Optional) The type of join operation (`JoinType`).
         merge_index: (Optional) The index to use for merge operations.
     """
     feature_name: str
-    source_class: Optional[Type[Union[AbstractFeatureGroup, str]]] = None
+    source_class: Optional[Type[Union[FeatureGroup, str]]] = None
     source_value: Optional[str] = None
-    left_link: Optional[Tuple[Type[AbstractFeatureGroup], Union[str, Index]]] = None
-    right_link: Optional[Tuple[Type[AbstractFeatureGroup], Union[str, Index]]] = None
+    left_link: Optional[Tuple[Type[FeatureGroup], Union[str, Index]]] = None
+    right_link: Optional[Tuple[Type[FeatureGroup], Union[str, Index]]] = None
     join_type: Optional[JoinType] = None
     merge_index: Optional[Union[str, Index]] = None
@@ -207,8 +207,8 @@ class SourceInputFeatureComposite:
     @classmethod
     def _handle_link(
         cls,
-        left_link: Tuple[Type[AbstractFeatureGroup], Union[str, Index]],
-        right_link: Tuple[Type[AbstractFeatureGroup], Union[str, Index]],
+        left_link: Tuple[Type[FeatureGroup], Union[str, Index]],
+        right_link: Tuple[Type[FeatureGroup], Union[str, Index]],
         join_type: Any,
     ) -> Link:
         """

mloda_plugins/feature_group/experimental/text_cleaning/base.py CHANGED Viewed

@@ -4,18 +4,19 @@ Base implementation for text cleaning feature groups.
 from __future__ import annotations
-from typing import Any, Optional, Set, Union
-from mloda_core.abstract_plugins.abstract_feature_group import AbstractFeatureGroup
-from mloda_core.abstract_plugins.components.feature import Feature
-from mloda_core.abstract_plugins.components.feature_chainer.feature_chain_parser import FeatureChainParser
-from mloda_core.abstract_plugins.components.feature_name import FeatureName
-from mloda_core.abstract_plugins.components.feature_set import FeatureSet
-from mloda_core.abstract_plugins.components.options import Options
+from typing import Any, Optional
+from mloda import FeatureGroup
+from mloda import Feature
+from mloda.provider import FeatureChainParser
+from mloda.provider import (
+    FeatureChainParserMixin,
+)
+from mloda.provider import FeatureSet
 from mloda_plugins.feature_group.experimental.default_options_key import DefaultOptionKeys
-class TextCleaningFeatureGroup(AbstractFeatureGroup):
+class TextCleaningFeatureGroup(FeatureChainParserMixin, FeatureGroup):
     # Option key for the list of operations
     CLEANING_OPERATIONS = "cleaning_operations"
@@ -33,13 +34,17 @@ class TextCleaningFeatureGroup(AbstractFeatureGroup):
     PATTERN = "__"
     PREFIX_PATTERN = r".*__cleaned_text$"
+    # In-feature configuration for FeatureChainParserMixin
+    MIN_IN_FEATURES = 1
+    MAX_IN_FEATURES = 1
     # Property mapping for configuration-based features
     PROPERTY_MAPPING = {
         CLEANING_OPERATIONS: {
             **SUPPORTED_OPERATIONS,  # All supported operations as valid options
-            DefaultOptionKeys.mloda_context: True,  # Mark as context parameter
-            DefaultOptionKeys.mloda_strict_validation: True,  # Enable strict validation
-            DefaultOptionKeys.mloda_validation_function: lambda operations: (
+            DefaultOptionKeys.context: True,  # Mark as context parameter
+            DefaultOptionKeys.strict_validation: True,  # Enable strict validation
+            DefaultOptionKeys.validation_function: lambda operations: (
                 # Handle both actual tuples/lists and string representations
                 (
                     isinstance(operations, (tuple, list))
@@ -59,7 +64,7 @@ class TextCleaningFeatureGroup(AbstractFeatureGroup):
         },
         DefaultOptionKeys.in_features: {
             "explanation": "Source feature to apply text cleaning operations to",
-            DefaultOptionKeys.mloda_context: True,
+            DefaultOptionKeys.context: True,
         },
     }
@@ -115,41 +120,6 @@ class TextCleaningFeatureGroup(AbstractFeatureGroup):
     - The source feature must contain text data
     """
-    def input_features(self, options: Options, feature_name: FeatureName) -> Optional[Set[Feature]]:
-        """Extract source feature from either configuration-based options or string parsing."""
-        source_feature: str | None = None
-        # Try string-based parsing first
-        _, source_feature = FeatureChainParser.parse_feature_name(feature_name, [self.PREFIX_PATTERN])
-        if source_feature is not None:
-            return {Feature(source_feature)}
-        # Fall back to configuration-based approach
-        source_features = options.get_in_features()
-        if len(source_features) != 1:
-            raise ValueError(
-                f"Expected exactly one source feature, but found {len(source_features)}: {source_features}"
-            )
-        return set(source_features)
-    @classmethod
-    def match_feature_group_criteria(
-        cls,
-        feature_name: Union[FeatureName, str],
-        options: Options,
-        data_access_collection: Optional[Any] = None,
-    ) -> bool:
-        """Check if feature name matches the expected pattern for text cleaning features."""
-        # Use the unified parser with property mapping for full configuration support
-        return FeatureChainParser.match_configuration_feature_chain_parser(
-            feature_name,
-            options,
-            property_mapping=cls.PROPERTY_MAPPING,
-            prefix_patterns=[cls.PREFIX_PATTERN],
-        )
     @classmethod
     def _extract_operations_and_source_feature(cls, feature: Feature) -> tuple[tuple[Any, Any], str]:
         """
@@ -166,31 +136,36 @@ class TextCleaningFeatureGroup(AbstractFeatureGroup):
         Raises:
             ValueError: If parameters cannot be extracted
         """
-        operations = None
-        source_feature_name: str | None = None
+        source_features = cls._extract_source_features(feature)
+        operations = cls._extract_cleaning_operations(feature)
+        if operations is None:
+            raise ValueError(f"Could not extract operations from: {feature.name}")
+        return operations, source_features[0]
+    @classmethod
+    def _extract_cleaning_operations(cls, feature: Feature) -> Optional[tuple[Any, Any]]:
+        """
+        Extract cleaning operations from a feature.
+        Tries string-based parsing first, falls back to configuration-based approach.
+        Args:
+            feature: The feature to extract operations from
+        Returns:
+            Tuple of cleaning operations, or None if not found
+        """
         # Try string-based parsing first
         feature_name_str = feature.name.name if hasattr(feature.name, "name") else str(feature.name)
         if FeatureChainParser.is_chained_feature(feature_name_str):
-            _, source_feature_name = FeatureChainParser.parse_feature_name(feature_name_str, [cls.PREFIX_PATTERN])
             # For string-based features, get operations from options
             operations = feature.options.get(cls.CLEANING_OPERATIONS) or ()
-            if source_feature_name is None:
-                raise ValueError(f"Could not extract source feature from string-based feature: {feature.name}")
-            return operations, source_feature_name  # type: ignore
+            return operations  # type: ignore
         # Fall back to configuration-based approach
-        source_features = feature.options.get_in_features()
-        source_feature = next(iter(source_features))
-        source_feature_name = source_feature.get_name()
         operations = feature.options.get(cls.CLEANING_OPERATIONS)
-        if operations is None or source_feature_name is None:
-            raise ValueError(f"Could not extract cleaning operations and source feature from: {feature.name}")
-        return operations, source_feature_name
+        return operations if operations is not None else None
     @classmethod
     def calculate_feature(cls, data: Any, features: FeatureSet) -> Any:

mloda_plugins/feature_group/experimental/text_cleaning/pandas.py CHANGED Viewed

@@ -25,7 +25,7 @@ except ImportError:
     pd = None
-from mloda_core.abstract_plugins.compute_frame_work import ComputeFrameWork
+from mloda import ComputeFramework
 from mloda_plugins.compute_framework.base_implementations.pandas.dataframe import PandasDataFrame
 from mloda_plugins.feature_group.experimental.text_cleaning.base import TextCleaningFeatureGroup
@@ -40,7 +40,7 @@ class PandasTextCleaningFeatureGroup(TextCleaningFeatureGroup):
     """
     @classmethod
-    def compute_framework_rule(cls) -> set[type[ComputeFrameWork]]:
+    def compute_framework_rule(cls) -> set[type[ComputeFramework]]:
         """Define the compute framework for this feature group."""
         return {PandasDataFrame}

mloda_plugins/feature_group/experimental/text_cleaning/python_dict.py CHANGED Viewed

@@ -9,7 +9,7 @@ import string
 import unicodedata
 from typing import Any, Dict, List, Set, Type, Union
-from mloda_core.abstract_plugins.compute_frame_work import ComputeFrameWork
+from mloda import ComputeFramework
 from mloda_plugins.compute_framework.base_implementations.python_dict.python_dict_framework import PythonDictFramework
 from mloda_plugins.feature_group.experimental.text_cleaning.base import TextCleaningFeatureGroup
@@ -35,7 +35,7 @@ class PythonDictTextCleaningFeatureGroup(TextCleaningFeatureGroup):
     """
     @classmethod
-    def compute_framework_rule(cls) -> Union[bool, Set[Type[ComputeFrameWork]]]:
+    def compute_framework_rule(cls) -> Union[bool, Set[Type[ComputeFramework]]]:
         return {PythonDictFramework}
     @classmethod

mloda 0.3.3__py3-none-any.whl → 0.4.1__py3-none-any.whl

mloda 0.3.3py3-none-any.whl → 0.4.1py3-none-any.whl