PyPI - featcopilot - Versions diffs - 0.1.0__tar.gz → 0.2.0__tar.gz - Mend

featcopilot 0.1.0tar.gz → 0.2.0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (47) hide show

{featcopilot-0.1.0 → featcopilot-0.2.0}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: featcopilot
-Version: 0.1.0
+Version: 0.2.0
 Summary: Next-generation LLM-powered auto feature engineering framework with GitHub Copilot SDK
 Author: FeatCopilot Contributors
 License: MIT
@@ -28,11 +28,26 @@ Requires-Dist: pydantic>=2.0.0
 Requires-Dist: joblib>=1.1.0
 Provides-Extra: llm
 Requires-Dist: github-copilot-sdk>=0.1.0; extra == "llm"
+Requires-Dist: nest_asyncio>=1.5.0; extra == "llm"
+Provides-Extra: litellm
+Requires-Dist: litellm>=1.0.0; extra == "litellm"
+Requires-Dist: nest_asyncio>=1.5.0; extra == "litellm"
 Provides-Extra: timeseries
 Requires-Dist: statsmodels>=0.13.0; extra == "timeseries"
+Provides-Extra: feast
+Requires-Dist: feast>=0.30.0; extra == "feast"
 Provides-Extra: full
 Requires-Dist: github-copilot-sdk>=0.1.0; extra == "full"
+Requires-Dist: litellm>=1.0.0; extra == "full"
 Requires-Dist: statsmodels>=0.13.0; extra == "full"
+Requires-Dist: feast>=0.30.0; extra == "full"
+Requires-Dist: nest_asyncio>=1.5.0; extra == "full"
+Provides-Extra: benchmark
+Requires-Dist: github-copilot-sdk>=0.1.0; extra == "benchmark"
+Requires-Dist: statsmodels>=0.13.0; extra == "benchmark"
+Requires-Dist: flaml[automl,blendsearch]>=2.0.0; extra == "benchmark"
+Requires-Dist: autogluon.tabular>=1.0.0; extra == "benchmark"
+Requires-Dist: h2o>=3.40.0; extra == "benchmark"
 Provides-Extra: dev
 Requires-Dist: pytest>=7.0.0; extra == "dev"
 Requires-Dist: pytest-asyncio>=0.21.0; extra == "dev"
@@ -44,9 +59,9 @@ Requires-Dist: pre-commit>=3.6.0; extra == "dev"
 # FeatCopilot 🚀
-**Next-Generation LLM-Powered Auto Feature Engineering with GitHub Copilot SDK**
+**Next-Generation LLM-Powered Auto Feature Engineering Framework**
-FeatCopilot is a unified feature engineering framework that combines the best approaches from existing libraries (Featuretools, TSFresh, AutoFeat, OpenFE) with novel LLM-powered capabilities via GitHub Copilot SDK.
+FeatCopilot automatically generates, selects, and explains predictive features using semantic understanding. It analyzes column meanings, applies domain-aware transformations, and provides human-readable explanations—turning raw data into ML-ready features in seconds.
 ## 📊 Benchmark Highlights
@@ -59,7 +74,7 @@ FeatCopilot is a unified feature engineering framework that combines the best ap
 | Classification | +0.54% | +4.35% |
 | Regression | +0.65% | +5.57% |
-### LLM Engine (With Copilot - 30-60s)
+### LLM Engine (With LiteLLM - 30-60s)
 | Task Type | Average Improvement | Best Case |
 |-----------|--------------------:|----------:|
@@ -87,7 +102,7 @@ FeatCopilot is a unified feature engineering framework that combines the best ap
 # Basic installation
 pip install featcopilot
-# With LLM capabilities (requires GitHub Copilot)
+# With LLM capabilities
 pip install featcopilot[llm]
 # Full installation
@@ -111,7 +126,7 @@ X_transformed = engineer.fit_transform(X, y)  # <1 second
 print(f"Features: {X.shape[1]} -> {X_transformed.shape[1]}")
 ```
-### LLM Mode (With Copilot)
+### LLM Mode (With LiteLLM)
 ```python
 from featcopilot import AutoFeatureEngineer
@@ -164,16 +179,24 @@ engine = TimeSeriesEngine(
 ```
 ### LLM Engine
-Uses GitHub Copilot SDK for intelligent feature generation.
+Uses GitHub Copilot SDK (default) or LiteLLM (100+ providers) for intelligent feature generation.
 ```python
 from featcopilot.llm import SemanticEngine
+# Default: GitHub Copilot SDK
 engine = SemanticEngine(
-    model='gpt-5',
+    model='gpt-5.2',
     max_suggestions=20,
     validate_features=True
 )
+# Alternative: LiteLLM backend
+engine = SemanticEngine(
+    model='gpt-4o',
+    backend='litellm',
+    max_suggestions=20
+)
 ```
 ## Feature Selection
@@ -211,7 +234,7 @@ X_selected = selector.fit_transform(X, y)
 - Python 3.9+
 - NumPy, Pandas, Scikit-learn
-- GitHub Copilot CLI (for LLM features)
+- GitHub Copilot SDK (default) or LiteLLM (for 100+ LLM providers)
 ## License

{featcopilot-0.1.0 → featcopilot-0.2.0}/README.md RENAMED Viewed

@@ -1,8 +1,8 @@
 # FeatCopilot 🚀
-**Next-Generation LLM-Powered Auto Feature Engineering with GitHub Copilot SDK**
+**Next-Generation LLM-Powered Auto Feature Engineering Framework**
-FeatCopilot is a unified feature engineering framework that combines the best approaches from existing libraries (Featuretools, TSFresh, AutoFeat, OpenFE) with novel LLM-powered capabilities via GitHub Copilot SDK.
+FeatCopilot automatically generates, selects, and explains predictive features using semantic understanding. It analyzes column meanings, applies domain-aware transformations, and provides human-readable explanations—turning raw data into ML-ready features in seconds.
 ## 📊 Benchmark Highlights
@@ -15,7 +15,7 @@ FeatCopilot is a unified feature engineering framework that combines the best ap
 | Classification | +0.54% | +4.35% |
 | Regression | +0.65% | +5.57% |
-### LLM Engine (With Copilot - 30-60s)
+### LLM Engine (With LiteLLM - 30-60s)
 | Task Type | Average Improvement | Best Case |
 |-----------|--------------------:|----------:|
@@ -43,7 +43,7 @@ FeatCopilot is a unified feature engineering framework that combines the best ap
 # Basic installation
 pip install featcopilot
-# With LLM capabilities (requires GitHub Copilot)
+# With LLM capabilities
 pip install featcopilot[llm]
 # Full installation
@@ -67,7 +67,7 @@ X_transformed = engineer.fit_transform(X, y)  # <1 second
 print(f"Features: {X.shape[1]} -> {X_transformed.shape[1]}")
 ```
-### LLM Mode (With Copilot)
+### LLM Mode (With LiteLLM)
 ```python
 from featcopilot import AutoFeatureEngineer
@@ -120,16 +120,24 @@ engine = TimeSeriesEngine(
 ```
 ### LLM Engine
-Uses GitHub Copilot SDK for intelligent feature generation.
+Uses GitHub Copilot SDK (default) or LiteLLM (100+ providers) for intelligent feature generation.
 ```python
 from featcopilot.llm import SemanticEngine
+# Default: GitHub Copilot SDK
 engine = SemanticEngine(
-    model='gpt-5',
+    model='gpt-5.2',
     max_suggestions=20,
     validate_features=True
 )
+# Alternative: LiteLLM backend
+engine = SemanticEngine(
+    model='gpt-4o',
+    backend='litellm',
+    max_suggestions=20
+)
 ```
 ## Feature Selection
@@ -167,7 +175,7 @@ X_selected = selector.fit_transform(X, y)
 - Python 3.9+
 - NumPy, Pandas, Scikit-learn
-- GitHub Copilot CLI (for LLM features)
+- GitHub Copilot SDK (default) or LiteLLM (for 100+ LLM providers)
 ## License

{featcopilot-0.1.0 → featcopilot-0.2.0}/featcopilot/__init__.py RENAMED Viewed

@@ -5,7 +5,9 @@ A unified feature engineering framework combining traditional approaches
 with novel LLM-powered capabilities via GitHub Copilot SDK.
 """
-__version__ = "0.1.0"
+from importlib.metadata import version
+__version__ = version("featcopilot")
 __author__ = "FeatCopilot Contributors"
 from featcopilot.core.base import BaseEngine, BaseSelector

{featcopilot-0.1.0 → featcopilot-0.2.0}/featcopilot/core/feature.py RENAMED Viewed

@@ -7,6 +7,10 @@ from typing import Any, Optional
 import numpy as np
 import pandas as pd
+from featcopilot.utils.logger import get_logger
+logger = get_logger(__name__)
 class FeatureType(Enum):
     """Types of features."""
@@ -220,5 +224,5 @@ class FeatureSet:
                     result[feature.name] = feature.compute(df)
                 except Exception as e:
                     # Log warning but continue
-                    print(f"Warning: Could not compute feature {feature.name}: {e}")
+                    logger.warning(f"Could not compute feature {feature.name}: {e}")
         return result

{featcopilot-0.1.0 → featcopilot-0.2.0}/featcopilot/engines/relational.py RENAMED Viewed

@@ -11,6 +11,9 @@ from pydantic import Field
 from featcopilot.core.base import BaseEngine, EngineConfig
 from featcopilot.core.feature import FeatureSet
+from featcopilot.utils.logger import get_logger
+logger = get_logger(__name__)
 class RelationalEngineConfig(EngineConfig):
@@ -141,7 +144,7 @@ class RelationalEngine(BaseEngine):
         self._primary_columns = X.columns.tolist()
         if self.config.verbose:
-            print(f"RelationalEngine: {len(self._relationships)} relationships defined")
+            logger.info(f"RelationalEngine: {len(self._relationships)} relationships defined")
         self._is_fitted = True
         return self
@@ -191,7 +194,7 @@ class RelationalEngine(BaseEngine):
         self._feature_names = [c for c in result.columns if c not in X.columns]
         if self.config.verbose:
-            print(f"RelationalEngine: Generated {len(self._feature_names)} features")
+            logger.info(f"RelationalEngine: Generated {len(self._feature_names)} features")
         return result

{featcopilot-0.1.0 → featcopilot-0.2.0}/featcopilot/engines/tabular.py RENAMED Viewed

@@ -12,6 +12,9 @@ from pydantic import Field
 from featcopilot.core.base import BaseEngine, EngineConfig
 from featcopilot.core.feature import Feature, FeatureOrigin, FeatureSet, FeatureType
+from featcopilot.utils.logger import get_logger
+logger = get_logger(__name__)
 class TabularEngineConfig(EngineConfig):
@@ -124,7 +127,7 @@ class TabularEngine(BaseEngine):
         ]
         if self.config.verbose:
-            print(f"TabularEngine: Found {len(self._numeric_columns)} numeric columns")
+            logger.info(f"TabularEngine: Found {len(self._numeric_columns)} numeric columns")
         # Plan features to generate
         self._plan_features(X)
@@ -207,7 +210,7 @@ class TabularEngine(BaseEngine):
             self._feature_set.add(feature)
         if self.config.verbose:
-            print(f"TabularEngine: Planned {len(self._feature_set)} features")
+            logger.info(f"TabularEngine: Planned {len(self._feature_set)} features")
     def transform(self, X: Union[pd.DataFrame, np.ndarray], **kwargs) -> pd.DataFrame:
         """
@@ -284,7 +287,7 @@ class TabularEngine(BaseEngine):
         self._feature_names = [c for c in result.columns if c not in X.columns]
         if self.config.verbose:
-            print(f"TabularEngine: Generated {len(self._feature_names)} features")
+            logger.info(f"TabularEngine: Generated {len(self._feature_names)} features")
         return result

{featcopilot-0.1.0 → featcopilot-0.2.0}/featcopilot/engines/text.py RENAMED Viewed

@@ -11,6 +11,9 @@ from pydantic import Field
 from featcopilot.core.base import BaseEngine, EngineConfig
 from featcopilot.core.feature import FeatureSet
+from featcopilot.utils.logger import get_logger
+logger = get_logger(__name__)
 class TextEngineConfig(EngineConfig):
@@ -106,7 +109,7 @@ class TextEngine(BaseEngine):
             ]
         if self.config.verbose:
-            print(f"TextEngine: Found {len(self._text_columns)} text columns")
+            logger.info(f"TextEngine: Found {len(self._text_columns)} text columns")
         # Fit TF-IDF vectorizers if needed
         if "tfidf" in self.config.features:
@@ -135,7 +138,7 @@ class TextEngine(BaseEngine):
         except ImportError:
             if self.config.verbose:
-                print("TextEngine: sklearn not available for TF-IDF, skipping")
+                logger.warning("TextEngine: sklearn not available for TF-IDF, skipping")
     def transform(self, X: Union[pd.DataFrame, np.ndarray], **kwargs) -> pd.DataFrame:
         """
@@ -191,7 +194,7 @@ class TextEngine(BaseEngine):
         self._feature_names = [c for c in result.columns if c not in X.columns]
         if self.config.verbose:
-            print(f"TextEngine: Extracted {len(self._feature_names)} features")
+            logger.info(f"TextEngine: Extracted {len(self._feature_names)} features")
         return result

{featcopilot-0.1.0 → featcopilot-0.2.0}/featcopilot/engines/timeseries.py RENAMED Viewed

@@ -12,6 +12,9 @@ from pydantic import Field
 from featcopilot.core.base import BaseEngine, EngineConfig
 from featcopilot.core.feature import FeatureSet
+from featcopilot.utils.logger import get_logger
+logger = get_logger(__name__)
 class TimeSeriesEngineConfig(EngineConfig):
@@ -123,7 +126,7 @@ class TimeSeriesEngine(BaseEngine):
         self._time_columns = X.select_dtypes(include=[np.number]).columns.tolist()
         if self.config.verbose:
-            print(f"TimeSeriesEngine: Found {len(self._time_columns)} numeric columns")
+            logger.info(f"TimeSeriesEngine: Found {len(self._time_columns)} numeric columns")
         self._is_fitted = True
         return self
@@ -177,7 +180,7 @@ class TimeSeriesEngine(BaseEngine):
         self._feature_names = list(result.columns)
         if self.config.verbose:
-            print(f"TimeSeriesEngine: Extracted {len(self._feature_names)} features")
+            logger.info(f"TimeSeriesEngine: Extracted {len(self._feature_names)} features")
         return result

{featcopilot-0.1.0 → featcopilot-0.2.0}/featcopilot/llm/__init__.py RENAMED Viewed

@@ -1,15 +1,18 @@
 """LLM-powered feature engineering module.
-Uses GitHub Copilot SDK for intelligent feature generation.
+Uses GitHub Copilot SDK or LiteLLM for intelligent feature generation.
 """
 from featcopilot.llm.code_generator import FeatureCodeGenerator
 from featcopilot.llm.copilot_client import CopilotFeatureClient
 from featcopilot.llm.explainer import FeatureExplainer
+from featcopilot.llm.litellm_client import LiteLLMFeatureClient, SyncLiteLLMFeatureClient
 from featcopilot.llm.semantic_engine import SemanticEngine
 __all__ = [
     "CopilotFeatureClient",
+    "LiteLLMFeatureClient",
+    "SyncLiteLLMFeatureClient",
     "SemanticEngine",
     "FeatureExplainer",
     "FeatureCodeGenerator",

{featcopilot-0.1.0 → featcopilot-0.2.0}/featcopilot/llm/code_generator.py RENAMED Viewed

@@ -10,6 +10,9 @@ import pandas as pd
 from featcopilot.core.feature import Feature, FeatureOrigin, FeatureType
 from featcopilot.llm.copilot_client import SyncCopilotFeatureClient
+from featcopilot.utils.logger import get_logger
+logger = get_logger(__name__)
 class FeatureCodeGenerator:
@@ -21,7 +24,7 @@ class FeatureCodeGenerator:
     Parameters
     ----------
-    model : str, default='gpt-5'
+    model : str, default='gpt-5.2'
         LLM model to use
     validate : bool, default=True
         Whether to validate generated code
@@ -35,7 +38,7 @@ class FeatureCodeGenerator:
     ... )
     """
-    def __init__(self, model: str = "gpt-5", validate: bool = True, verbose: bool = False):
+    def __init__(self, model: str = "gpt-5.2", validate: bool = True, verbose: bool = False):
         self.model = model
         self.validate = validate
         self.verbose = verbose
@@ -98,7 +101,7 @@ class FeatureCodeGenerator:
             )
             if not validation["valid"]:
                 if self.verbose:
-                    print(f"Code validation failed: {validation['error']}")
+                    logger.warning(f"Code validation failed: {validation['error']}")
                 # Try to fix common issues
                 code = self._fix_common_issues(code, validation["error"])
@@ -144,7 +147,7 @@ class FeatureCodeGenerator:
                 features.append(feature)
             except Exception as e:
                 if self.verbose:
-                    print(f"Failed to generate feature for '{desc}': {e}")
+                    logger.error(f"Failed to generate feature for '{desc}': {e}")
         return features

{featcopilot-0.1.0 → featcopilot-0.2.0}/featcopilot/llm/copilot_client.py RENAMED Viewed

@@ -10,11 +10,15 @@ from typing import Any, Optional
 from pydantic import BaseModel, Field
+from featcopilot.utils.logger import get_logger
+logger = get_logger(__name__)
 class CopilotConfig(BaseModel):
     """Configuration for Copilot client."""
-    model: str = Field(default="gpt-5", description="Model to use")
+    model: str = Field(default="gpt-5.2", description="Model to use")
     temperature: float = Field(default=0.3, ge=0, le=1, description="Temperature for generation")
     max_tokens: int = Field(default=4096, description="Maximum tokens in response")
     timeout: float = Field(default=60.0, description="Timeout in seconds")
@@ -35,12 +39,12 @@ class CopilotFeatureClient:
     ----------
     config : CopilotConfig, optional
         Configuration for the client
-    model : str, default='gpt-5'
+    model : str, default='gpt-5.2'
         Model to use for generation
     Examples
     --------
-    >>> client = CopilotFeatureClient(model='gpt-5')
+    >>> client = CopilotFeatureClient(model='gpt-5.2')
     >>> await client.start()
     >>> suggestions = await client.suggest_features(
     ...     column_info={'age': 'int', 'income': 'float'},
@@ -49,7 +53,7 @@ class CopilotFeatureClient:
     >>> await client.stop()
     """
-    def __init__(self, config: Optional[CopilotConfig] = None, model: str = "gpt-5", **kwargs):
+    def __init__(self, config: Optional[CopilotConfig] = None, model: str = "gpt-5.2", **kwargs):
         self.config = config or CopilotConfig(model=model, **kwargs)
         self._client = None
         self._session = None
@@ -82,13 +86,13 @@ class CopilotFeatureClient:
             # Copilot SDK not installed - use mock mode
             self._copilot_available = False
             self._is_started = True
-            print("Warning: copilot-sdk not installed. Using mock LLM responses.")
+            logger.warning("copilot-sdk not installed. Using mock LLM responses.")
         except Exception as e:
             # Copilot not available - use mock mode
             self._copilot_available = False
             self._is_started = True
-            print(f"Warning: Could not connect to Copilot: {e}. Using mock LLM responses.")
+            logger.warning(f"Could not connect to Copilot: {e}. Using mock LLM responses.")
         return self
@@ -469,7 +473,37 @@ result = df['col1'] / (df['col2'] + 1e-8)
                 local_vars = {"df": df, "np": np, "pd": pd}
                 exec(
                     code,
-                    {"__builtins__": {"len": len, "sum": sum, "max": max, "min": min}},
+                    {
+                        "__builtins__": {
+                            "len": len,
+                            "sum": sum,
+                            "max": max,
+                            "min": min,
+                            "int": int,
+                            "float": float,
+                            "str": str,
+                            "bool": bool,
+                            "abs": abs,
+                            "round": round,
+                            "pow": pow,
+                            "range": range,
+                            "list": list,
+                            "dict": dict,
+                            "set": set,
+                            "tuple": tuple,
+                            "sorted": sorted,
+                            "reversed": reversed,
+                            "enumerate": enumerate,
+                            "zip": zip,
+                            "any": any,
+                            "all": all,
+                            "map": map,
+                            "filter": filter,
+                            "isinstance": isinstance,
+                            "hasattr": hasattr,
+                            "getattr": getattr,
+                        }
+                    },
                     local_vars,
                 )
@@ -489,33 +523,43 @@ class SyncCopilotFeatureClient:
     def __init__(self, **kwargs):
         self._async_client = CopilotFeatureClient(**kwargs)
-        self._loop = None
-    def _get_loop(self):
-        if self._loop is None or self._loop.is_closed():
+    def _run_async(self, coro):
+        """Run an async coroutine, handling nested event loops (e.g., Jupyter)."""
+        try:
+            # Check if we're in a running event loop (e.g., Jupyter)
+            loop = asyncio.get_running_loop()
+            # We're in a running loop - use nest_asyncio if available
             try:
-                self._loop = asyncio.get_event_loop()
-            except RuntimeError:
-                self._loop = asyncio.new_event_loop()
-                asyncio.set_event_loop(self._loop)
-        return self._loop
+                import nest_asyncio
+                nest_asyncio.apply()
+                return loop.run_until_complete(coro)
+            except ImportError:
+                # nest_asyncio not available, try alternative approach
+                import concurrent.futures
+                with concurrent.futures.ThreadPoolExecutor() as executor:
+                    future = executor.submit(asyncio.run, coro)
+                    return future.result()
+        except RuntimeError:
+            # No running event loop - safe to use asyncio.run
+            return asyncio.run(coro)
     def start(self):
-        return self._get_loop().run_until_complete(self._async_client.start())
+        return self._run_async(self._async_client.start())
     def stop(self):
-        return self._get_loop().run_until_complete(self._async_client.stop())
+        return self._run_async(self._async_client.stop())
     def suggest_features(self, **kwargs):
-        return self._get_loop().run_until_complete(self._async_client.suggest_features(**kwargs))
+        return self._run_async(self._async_client.suggest_features(**kwargs))
     def explain_feature(self, **kwargs):
-        return self._get_loop().run_until_complete(self._async_client.explain_feature(**kwargs))
+        return self._run_async(self._async_client.explain_feature(**kwargs))
     def generate_feature_code(self, **kwargs):
-        return self._get_loop().run_until_complete(self._async_client.generate_feature_code(**kwargs))
+        return self._run_async(self._async_client.generate_feature_code(**kwargs))
     def validate_feature_code(self, code: str, sample_data=None):
-        return self._get_loop().run_until_complete(
-            self._async_client.validate_feature_code(code=code, sample_data=sample_data)
-        )
+        return self._run_async(self._async_client.validate_feature_code(code=code, sample_data=sample_data))

{featcopilot-0.1.0 → featcopilot-0.2.0}/featcopilot/llm/explainer.py RENAMED Viewed

@@ -9,6 +9,9 @@ import pandas as pd
 from featcopilot.core.feature import Feature, FeatureSet
 from featcopilot.llm.copilot_client import SyncCopilotFeatureClient
+from featcopilot.utils.logger import get_logger
+logger = get_logger(__name__)
 class FeatureExplainer:
@@ -20,7 +23,7 @@ class FeatureExplainer:
     Parameters
     ----------
-    model : str, default='gpt-5'
+    model : str, default='gpt-5.2'
         LLM model to use
     Examples
@@ -29,7 +32,7 @@ class FeatureExplainer:
     >>> explanations = explainer.explain_features(feature_set, task='predict churn')
     """
-    def __init__(self, model: str = "gpt-5", verbose: bool = False):
+    def __init__(self, model: str = "gpt-5.2", verbose: bool = False):
         self.model = model
         self.verbose = verbose
         self._client: Optional[SyncCopilotFeatureClient] = None
@@ -115,7 +118,7 @@ class FeatureExplainer:
             except Exception as e:
                 if self.verbose:
-                    print(f"Could not explain {feature.name}: {e}")
+                    logger.error(f"Could not explain {feature.name}: {e}")
                 explanations[feature.name] = f"Feature based on: {', '.join(feature.source_columns)}"
         return explanations

featcopilot 0.1.0__tar.gz → 0.2.0__tar.gz

featcopilot 0.1.0tar.gz → 0.2.0tar.gz