PyPI - featcopilot - Versions diffs - 0.1.0__py3-none-any.whl → 0.2.0__py3-none-any.whl - Mend

featcopilot 0.1.0py3-none-any.whl → 0.2.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (29) hide show

featcopilot/__init__.py +3 -1
featcopilot/core/feature.py +5 -1
featcopilot/engines/relational.py +5 -2
featcopilot/engines/tabular.py +6 -3
featcopilot/engines/text.py +6 -3
featcopilot/engines/timeseries.py +5 -2
featcopilot/llm/__init__.py +4 -1
featcopilot/llm/code_generator.py +7 -4
featcopilot/llm/copilot_client.py +67 -23
featcopilot/llm/explainer.py +6 -3
featcopilot/llm/litellm_client.py +595 -0
featcopilot/llm/semantic_engine.py +65 -16
featcopilot/selection/importance.py +5 -2
featcopilot/selection/redundancy.py +6 -3
featcopilot/selection/statistical.py +4 -1
featcopilot/selection/unified.py +4 -1
featcopilot/stores/__init__.py +15 -0
featcopilot/stores/base.py +166 -0
featcopilot/stores/feast_store.py +541 -0
featcopilot/transformers/sklearn_compat.py +8 -5
featcopilot/utils/__init__.py +14 -0
featcopilot/utils/logger.py +47 -0
featcopilot/utils/models.py +287 -0
featcopilot/utils/parallel.py +5 -1
{featcopilot-0.1.0.dist-info → featcopilot-0.2.0.dist-info}/METADATA +32 -9
featcopilot-0.2.0.dist-info/RECORD +35 -0
featcopilot-0.1.0.dist-info/RECORD +0 -29
{featcopilot-0.1.0.dist-info → featcopilot-0.2.0.dist-info}/WHEEL +0 -0
{featcopilot-0.1.0.dist-info → featcopilot-0.2.0.dist-info}/top_level.txt +0 -0

featcopilot/llm/semantic_engine.py CHANGED Viewed

@@ -3,7 +3,7 @@
 Uses contextual understanding of data to generate meaningful features.
 """
-from typing import Any, Optional, Union
+from typing import Any, Literal, Optional, Union
 import numpy as np
 import pandas as pd
@@ -11,25 +11,30 @@ from pydantic import Field
 from featcopilot.core.base import BaseEngine, EngineConfig
 from featcopilot.core.feature import Feature, FeatureOrigin, FeatureSet, FeatureType
-from featcopilot.llm.copilot_client import SyncCopilotFeatureClient
+from featcopilot.utils.logger import get_logger
+logger = get_logger(__name__)
 class SemanticEngineConfig(EngineConfig):
     """Configuration for semantic feature engine."""
     name: str = "SemanticEngine"
-    model: str = Field(default="gpt-5", description="LLM model to use")
+    model: str = Field(default="gpt-5.2", description="LLM model to use")
     max_suggestions: int = Field(default=20, description="Max features to suggest")
     validate_features: bool = Field(default=True, description="Validate generated code")
     domain: Optional[str] = Field(default=None, description="Domain context")
     temperature: float = Field(default=0.3, description="LLM temperature")
+    backend: Literal["copilot", "litellm"] = Field(default="copilot", description="LLM backend to use")
+    api_key: Optional[str] = Field(default=None, description="API key for litellm backend")
+    api_base: Optional[str] = Field(default=None, description="Custom API base URL for litellm")
 class SemanticEngine(BaseEngine):
     """
     LLM-powered semantic feature engineering engine.
-    Uses GitHub Copilot SDK to:
+    Uses GitHub Copilot SDK or LiteLLM to:
     - Understand column semantics from names and descriptions
     - Generate domain-aware features
     - Create interpretable features with explanations
@@ -39,7 +44,7 @@ class SemanticEngine(BaseEngine):
     Parameters
     ----------
-    model : str, default='gpt-5'
+    model : str, default='gpt-5.2'
         LLM model to use
     max_suggestions : int, default=20
         Maximum number of features to suggest
@@ -47,24 +52,54 @@ class SemanticEngine(BaseEngine):
         Whether to validate generated feature code
     domain : str, optional
         Domain context (e.g., 'healthcare', 'finance', 'retail')
+    backend : str, default='copilot'
+        LLM backend to use: 'copilot' or 'litellm'
+    api_key : str, optional
+        API key for litellm backend (uses environment variable if not provided)
+    api_base : str, optional
+        Custom API base URL for litellm backend (for self-hosted models)
     Examples
     --------
-    >>> engine = SemanticEngine(model='gpt-5', domain='healthcare')
+    Using GitHub Copilot SDK (default):
+    >>> engine = SemanticEngine(model='gpt-5.2', domain='healthcare')
     >>> X_features = engine.fit_transform(
     ...     X, y,
     ...     column_descriptions={'age': 'Patient age', 'bmi': 'Body mass index'},
     ...     task_description='Predict diabetes risk'
     ... )
+    Using LiteLLM with OpenAI:
+    >>> engine = SemanticEngine(
+    ...     model='gpt-4o',
+    ...     backend='litellm',
+    ...     api_key='your-api-key'  # or set OPENAI_API_KEY env var
+    ... )
+    Using LiteLLM with Anthropic:
+    >>> engine = SemanticEngine(
+    ...     model='claude-3-opus',
+    ...     backend='litellm'
+    ... )
+    Using LiteLLM with local Ollama:
+    >>> engine = SemanticEngine(
+    ...     model='ollama/llama2',
+    ...     backend='litellm',
+    ...     api_base='http://localhost:11434'
+    ... )
     """
     def __init__(
         self,
-        model: str = "gpt-5",
+        model: str = "gpt-5.2",
         max_suggestions: int = 20,
         validate_features: bool = True,
         domain: Optional[str] = None,
         verbose: bool = False,
+        backend: Literal["copilot", "litellm"] = "copilot",
+        api_key: Optional[str] = None,
+        api_base: Optional[str] = None,
         **kwargs,
     ):
         config = SemanticEngineConfig(
@@ -73,11 +108,14 @@ class SemanticEngine(BaseEngine):
             validate_features=validate_features,
             domain=domain,
             verbose=verbose,
+            backend=backend,
+            api_key=api_key,
+            api_base=api_base,
             **kwargs,
         )
         super().__init__(config=config)
         self.config: SemanticEngineConfig = config
-        self._client: Optional[SyncCopilotFeatureClient] = None
+        self._client: Optional[Any] = None
         self._suggested_features: list[dict[str, Any]] = []
         self._feature_set = FeatureSet()
         self._column_info: dict[str, str] = {}
@@ -85,9 +123,20 @@ class SemanticEngine(BaseEngine):
         self._task_description: str = ""
     def _ensure_client(self) -> None:
-        """Ensure Copilot client is initialized."""
+        """Ensure LLM client is initialized."""
         if self._client is None:
-            self._client = SyncCopilotFeatureClient(model=self.config.model)
+            if self.config.backend == "litellm":
+                from featcopilot.llm.litellm_client import SyncLiteLLMFeatureClient
+                self._client = SyncLiteLLMFeatureClient(
+                    model=self.config.model,
+                    api_key=self.config.api_key,
+                    api_base=self.config.api_base,
+                )
+            else:
+                from featcopilot.llm.copilot_client import SyncCopilotFeatureClient
+                self._client = SyncCopilotFeatureClient(model=self.config.model)
             self._client.start()
     def fit(
@@ -137,7 +186,7 @@ class SemanticEngine(BaseEngine):
         # Get LLM suggestions
         if self.config.verbose:
-            print("SemanticEngine: Requesting feature suggestions from LLM...")
+            logger.info("SemanticEngine: Requesting feature suggestions from LLM...")
         self._suggested_features = self._client.suggest_features(
             column_info=self._column_info,
@@ -148,7 +197,7 @@ class SemanticEngine(BaseEngine):
         )
         if self.config.verbose:
-            print(f"SemanticEngine: Received {len(self._suggested_features)} suggestions")
+            logger.info(f"SemanticEngine: Received {len(self._suggested_features)} suggestions")
         # Validate features if enabled
         if self.config.validate_features:
@@ -175,14 +224,14 @@ class SemanticEngine(BaseEngine):
             if result["valid"]:
                 valid_features.append(feature)
             elif self.config.verbose:
-                print(
+                logger.warning(
                     f"SemanticEngine: Invalid feature '{feature.get('name', 'unknown')}': {result.get('error', 'unknown error')}"
                 )
         self._suggested_features = valid_features
         if self.config.verbose:
-            print(f"SemanticEngine: {len(valid_features)} valid features after validation")
+            logger.info(f"SemanticEngine: {len(valid_features)} valid features after validation")
     def _build_feature_set(self) -> None:
         """Build FeatureSet from suggestions."""
@@ -266,7 +315,7 @@ class SemanticEngine(BaseEngine):
             except Exception as e:
                 if self.config.verbose:
-                    print(f"SemanticEngine: Error computing '{name}': {e}")
+                    logger.error(f"SemanticEngine: Error computing '{name}': {e}")
         # Handle infinities and NaNs
         result = result.replace([np.inf, -np.inf], np.nan)
@@ -274,7 +323,7 @@ class SemanticEngine(BaseEngine):
         self._feature_names = successful_features
         if self.config.verbose:
-            print(f"SemanticEngine: Successfully generated {len(successful_features)} features")
+            logger.info(f"SemanticEngine: Successfully generated {len(successful_features)} features")
         return result

featcopilot/selection/importance.py CHANGED Viewed

@@ -6,6 +6,9 @@ import numpy as np
 import pandas as pd
 from featcopilot.core.base import BaseSelector
+from featcopilot.utils.logger import get_logger
+logger = get_logger(__name__)
 class ImportanceSelector(BaseSelector):
@@ -119,7 +122,7 @@ class ImportanceSelector(BaseSelector):
                     return xgb.XGBRegressor(n_estimators=self.n_estimators, random_state=42, n_jobs=-1)
             except ImportError:
                 if self.verbose:
-                    print("XGBoost not available, falling back to RandomForest")
+                    logger.warning("XGBoost not available, falling back to RandomForest")
                 return self._create_model_fallback(is_classification)
         else:
@@ -149,7 +152,7 @@ class ImportanceSelector(BaseSelector):
         self._selected_features = [name for name, _ in sorted_features]
         if self.verbose:
-            print(f"ImportanceSelector: Selected {len(self._selected_features)} features")
+            logger.info(f"ImportanceSelector: Selected {len(self._selected_features)} features")
     def transform(self, X: Union[pd.DataFrame, np.ndarray], **kwargs) -> pd.DataFrame:
         """Select features from data."""

featcopilot/selection/redundancy.py CHANGED Viewed

@@ -6,6 +6,9 @@ import numpy as np
 import pandas as pd
 from featcopilot.core.base import BaseSelector
+from featcopilot.utils.logger import get_logger
+logger = get_logger(__name__)
 class RedundancyEliminator(BaseSelector):
@@ -119,11 +122,11 @@ class RedundancyEliminator(BaseSelector):
                     if imp1 >= imp2:
                         to_remove.add(col2)
                         if self.verbose:
-                            print(f"Removing {col2} (corr={corr:.3f} with {col1})")
+                            logger.info(f"Removing {col2} (corr={corr:.3f} with {col1})")
                     else:
                         to_remove.add(col1)
                         if self.verbose:
-                            print(f"Removing {col1} (corr={corr:.3f} with {col2})")
+                            logger.info(f"Removing {col1} (corr={corr:.3f} with {col2})")
                         break  # col1 is removed, move to next
         # Selected features are those not removed
@@ -131,7 +134,7 @@ class RedundancyEliminator(BaseSelector):
         self._removed_features = list(to_remove)
         if self.verbose:
-            print(f"RedundancyEliminator: Removed {len(to_remove)} redundant features")
+            logger.info(f"RedundancyEliminator: Removed {len(to_remove)} redundant features")
     def transform(self, X: Union[pd.DataFrame, np.ndarray], **kwargs) -> pd.DataFrame:
         """Remove redundant features."""

featcopilot/selection/statistical.py CHANGED Viewed

@@ -6,6 +6,9 @@ import numpy as np
 import pandas as pd
 from featcopilot.core.base import BaseSelector
+from featcopilot.utils.logger import get_logger
+logger = get_logger(__name__)
 class StatisticalSelector(BaseSelector):
@@ -173,7 +176,7 @@ class StatisticalSelector(BaseSelector):
         self._selected_features = [name for name, _ in sorted_features]
         if self.verbose:
-            print(f"StatisticalSelector: Selected {len(self._selected_features)} features")
+            logger.info(f"StatisticalSelector: Selected {len(self._selected_features)} features")
     def transform(self, X: Union[pd.DataFrame, np.ndarray], **kwargs) -> pd.DataFrame:
         """

featcopilot/selection/unified.py CHANGED Viewed

@@ -9,6 +9,9 @@ from featcopilot.core.base import BaseSelector
 from featcopilot.selection.importance import ImportanceSelector
 from featcopilot.selection.redundancy import RedundancyEliminator
 from featcopilot.selection.statistical import StatisticalSelector
+from featcopilot.utils.logger import get_logger
+logger = get_logger(__name__)
 class FeatureSelector(BaseSelector):
@@ -152,7 +155,7 @@ class FeatureSelector(BaseSelector):
         self._selected_features = [name for name, _ in sorted_features]
         if self.verbose:
-            print(f"FeatureSelector: Selected {len(self._selected_features)} features")
+            logger.info(f"FeatureSelector: Selected {len(self._selected_features)} features")
     def transform(self, X: Union[pd.DataFrame, np.ndarray], **kwargs) -> pd.DataFrame:
         """Select features from data."""

featcopilot/stores/__init__.py ADDED Viewed

@@ -0,0 +1,15 @@
+"""Feature store integrations for FeatCopilot.
+Provides interfaces to save and retrieve engineered features
+from popular feature stores like Feast, enabling feature reuse
+and serving in production ML systems.
+"""
+from featcopilot.stores.base import BaseFeatureStore, FeatureStoreConfig
+from featcopilot.stores.feast_store import FeastFeatureStore
+__all__ = [
+    "BaseFeatureStore",
+    "FeatureStoreConfig",
+    "FeastFeatureStore",
+]

featcopilot/stores/base.py ADDED Viewed

@@ -0,0 +1,166 @@
+"""Base classes for feature store integrations."""
+from abc import ABC, abstractmethod
+from typing import Any, Optional
+import pandas as pd
+from pydantic import BaseModel, Field
+from featcopilot.core.feature import FeatureSet
+class FeatureStoreConfig(BaseModel):
+    """Base configuration for feature stores."""
+    name: str = Field(description="Feature store name")
+    entity_columns: list[str] = Field(default_factory=list, description="Entity/key columns")
+    timestamp_column: Optional[str] = Field(default=None, description="Event timestamp column")
+    feature_prefix: str = Field(default="", description="Prefix for feature names")
+    tags: dict[str, str] = Field(default_factory=dict, description="Tags/labels for features")
+class BaseFeatureStore(ABC):
+    """
+    Abstract base class for feature store integrations.
+    Provides a unified interface for saving and retrieving
+    engineered features from various feature stores.
+    Parameters
+    ----------
+    config : FeatureStoreConfig
+        Configuration for the feature store
+    Examples
+    --------
+    >>> store = ConcreteFeatureStore(config)
+    >>> store.save_features(X_transformed, feature_set, feature_view_name='my_features')
+    >>> features = store.get_features(entity_df, feature_names=['feat1', 'feat2'])
+    """
+    def __init__(self, config: FeatureStoreConfig):
+        self.config = config
+        self._is_initialized = False
+    @abstractmethod
+    def initialize(self) -> None:
+        """
+        Initialize connection to the feature store.
+        This should be called before any other operations.
+        """
+        pass
+    @abstractmethod
+    def save_features(
+        self,
+        df: pd.DataFrame,
+        feature_set: Optional[FeatureSet] = None,
+        feature_view_name: str = "featcopilot_features",
+        description: Optional[str] = None,
+        **kwargs,
+    ) -> None:
+        """
+        Save features to the feature store.
+        Parameters
+        ----------
+        df : DataFrame
+            DataFrame containing features to save
+        feature_set : FeatureSet, optional
+            FeatCopilot FeatureSet with metadata
+        feature_view_name : str
+            Name for the feature view/table
+        description : str, optional
+            Description of the feature view
+        **kwargs
+            Additional store-specific options
+        """
+        pass
+    @abstractmethod
+    def get_features(
+        self,
+        entity_df: pd.DataFrame,
+        feature_names: list[str],
+        feature_view_name: str = "featcopilot_features",
+        **kwargs,
+    ) -> pd.DataFrame:
+        """
+        Retrieve features from the feature store.
+        Parameters
+        ----------
+        entity_df : DataFrame
+            DataFrame with entity keys and timestamps
+        feature_names : list
+            Names of features to retrieve
+        feature_view_name : str
+            Name of the feature view/table
+        **kwargs
+            Additional store-specific options
+        Returns
+        -------
+        DataFrame
+            DataFrame with requested features
+        """
+        pass
+    @abstractmethod
+    def list_feature_views(self) -> list[str]:
+        """
+        List all feature views in the store.
+        Returns
+        -------
+        list
+            Names of feature views
+        """
+        pass
+    @abstractmethod
+    def get_feature_view_schema(self, feature_view_name: str) -> dict[str, Any]:
+        """
+        Get schema/metadata for a feature view.
+        Parameters
+        ----------
+        feature_view_name : str
+            Name of the feature view
+        Returns
+        -------
+        dict
+            Schema information
+        """
+        pass
+    @abstractmethod
+    def delete_feature_view(self, feature_view_name: str) -> bool:
+        """
+        Delete a feature view.
+        Parameters
+        ----------
+        feature_view_name : str
+            Name of the feature view to delete
+        Returns
+        -------
+        bool
+            Whether deletion was successful
+        """
+        pass
+    def close(self) -> None:
+        """Close connection to the feature store."""
+        self._is_initialized = False
+    def __enter__(self):
+        self.initialize()
+        return self
+    def __exit__(self, exc_type, exc_val, exc_tb):
+        self.close()
+        return False

featcopilot 0.1.0__py3-none-any.whl → 0.2.0__py3-none-any.whl

featcopilot 0.1.0py3-none-any.whl → 0.2.0py3-none-any.whl