PyPI - featcopilot - Versions diffs - 0.1.0__py3-none-any.whl → 0.3.0__py3-none-any.whl - Mend

featcopilot 0.1.0py3-none-any.whl → 0.3.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (33) hide show

featcopilot/__init__.py +10 -1
featcopilot/core/__init__.py +2 -0
featcopilot/core/feature.py +5 -1
featcopilot/core/transform_rule.py +276 -0
featcopilot/engines/relational.py +5 -2
featcopilot/engines/tabular.py +151 -5
featcopilot/engines/text.py +352 -11
featcopilot/engines/timeseries.py +235 -3
featcopilot/llm/__init__.py +6 -1
featcopilot/llm/code_generator.py +7 -4
featcopilot/llm/copilot_client.py +97 -20
featcopilot/llm/explainer.py +6 -3
featcopilot/llm/litellm_client.py +595 -0
featcopilot/llm/semantic_engine.py +717 -26
featcopilot/llm/transform_rule_generator.py +403 -0
featcopilot/selection/importance.py +40 -9
featcopilot/selection/redundancy.py +39 -10
featcopilot/selection/statistical.py +107 -34
featcopilot/selection/unified.py +57 -3
featcopilot/stores/__init__.py +17 -0
featcopilot/stores/base.py +166 -0
featcopilot/stores/feast_store.py +541 -0
featcopilot/stores/rule_store.py +343 -0
featcopilot/transformers/sklearn_compat.py +18 -6
featcopilot/utils/__init__.py +14 -0
featcopilot/utils/logger.py +47 -0
featcopilot/utils/models.py +287 -0
featcopilot/utils/parallel.py +5 -1
{featcopilot-0.1.0.dist-info → featcopilot-0.3.0.dist-info}/METADATA +56 -25
featcopilot-0.3.0.dist-info/RECORD +38 -0
featcopilot-0.1.0.dist-info/RECORD +0 -29
{featcopilot-0.1.0.dist-info → featcopilot-0.3.0.dist-info}/WHEEL +0 -0
{featcopilot-0.1.0.dist-info → featcopilot-0.3.0.dist-info}/top_level.txt +0 -0

featcopilot/stores/rule_store.py ADDED Viewed

@@ -0,0 +1,343 @@
+"""Persistent storage for transform rules.
+Provides JSON-file based storage for saving, loading, and searching
+reusable transform rules.
+"""
+import json
+import os
+from pathlib import Path
+from typing import Optional
+from featcopilot.core.transform_rule import TransformRule
+from featcopilot.utils.logger import get_logger
+logger = get_logger(__name__)
+class TransformRuleStore:
+    """
+    Persistent storage for transform rules.
+    Stores rules in a JSON file for reuse across sessions and datasets.
+    Supports searching by tags, description similarity, and column patterns.
+    Parameters
+    ----------
+    path : str, optional
+        Path to the JSON file for storage. Defaults to ~/.featcopilot/rules.json
+    Examples
+    --------
+    >>> store = TransformRuleStore()
+    >>> store.save_rule(rule)
+    >>> matching = store.find_matching_rules(columns=["price", "quantity"])
+    >>> all_rules = store.list_rules()
+    """
+    DEFAULT_PATH = "~/.featcopilot/rules.json"
+    def __init__(self, path: Optional[str] = None):
+        self.path = Path(os.path.expanduser(path or self.DEFAULT_PATH))
+        self._rules: dict[str, TransformRule] = {}
+        self._ensure_directory()
+        self._load()
+    def _ensure_directory(self) -> None:
+        """Ensure the storage directory exists."""
+        self.path.parent.mkdir(parents=True, exist_ok=True)
+    def _load(self) -> None:
+        """Load rules from storage file."""
+        if self.path.exists():
+            try:
+                with open(self.path, encoding="utf-8") as f:
+                    data = json.load(f)
+                    self._rules = {rule_id: TransformRule.from_dict(rule_data) for rule_id, rule_data in data.items()}
+                logger.debug(f"Loaded {len(self._rules)} rules from {self.path}")
+            except (json.JSONDecodeError, KeyError) as e:
+                logger.warning(f"Failed to load rules from {self.path}: {e}")
+                self._rules = {}
+        else:
+            self._rules = {}
+    def _save(self) -> None:
+        """Save rules to storage file."""
+        try:
+            with open(self.path, "w", encoding="utf-8") as f:
+                data = {rule_id: rule.to_dict() for rule_id, rule in self._rules.items()}
+                json.dump(data, f, indent=2)
+            logger.debug(f"Saved {len(self._rules)} rules to {self.path}")
+        except OSError as e:
+            logger.error(f"Failed to save rules to {self.path}: {e}")
+            raise
+    def save_rule(self, rule: TransformRule) -> str:
+        """
+        Save a rule to the store.
+        Parameters
+        ----------
+        rule : TransformRule
+            The rule to save
+        Returns
+        -------
+        str
+            The rule's ID
+        """
+        self._rules[rule.id] = rule
+        self._save()
+        logger.info(f"Saved rule '{rule.name}' with ID {rule.id}")
+        return rule.id
+    def get_rule(self, rule_id: str) -> Optional[TransformRule]:
+        """
+        Get a rule by ID.
+        Parameters
+        ----------
+        rule_id : str
+            The rule's ID
+        Returns
+        -------
+        TransformRule or None
+            The rule if found, None otherwise
+        """
+        return self._rules.get(rule_id)
+    def get_rule_by_name(self, name: str) -> Optional[TransformRule]:
+        """
+        Get a rule by name.
+        Parameters
+        ----------
+        name : str
+            The rule's name
+        Returns
+        -------
+        TransformRule or None
+            The first rule matching the name, None if not found
+        """
+        for rule in self._rules.values():
+            if rule.name == name:
+                return rule
+        return None
+    def delete_rule(self, rule_id: str) -> bool:
+        """
+        Delete a rule by ID.
+        Parameters
+        ----------
+        rule_id : str
+            The rule's ID
+        Returns
+        -------
+        bool
+            True if deleted, False if not found
+        """
+        if rule_id in self._rules:
+            del self._rules[rule_id]
+            self._save()
+            logger.info(f"Deleted rule {rule_id}")
+            return True
+        return False
+    def list_rules(self, tags: Optional[list[str]] = None) -> list[TransformRule]:
+        """
+        List all rules, optionally filtered by tags.
+        Parameters
+        ----------
+        tags : list[str], optional
+            Filter rules that have all specified tags
+        Returns
+        -------
+        list[TransformRule]
+            List of matching rules
+        """
+        rules = list(self._rules.values())
+        if tags:
+            rules = [r for r in rules if all(t in r.tags for t in tags)]
+        return rules
+    def find_matching_rules(
+        self,
+        columns: Optional[list[str]] = None,
+        description: Optional[str] = None,
+        tags: Optional[list[str]] = None,
+        min_usage: int = 0,
+    ) -> list[tuple[TransformRule, dict[str, str]]]:
+        """
+        Find rules that can be applied to the given context.
+        Parameters
+        ----------
+        columns : list[str], optional
+            Available column names to match against
+        description : str, optional
+            Description to search for (keyword matching)
+        tags : list[str], optional
+            Required tags
+        min_usage : int, default=0
+            Minimum usage count
+        Returns
+        -------
+        list[tuple[TransformRule, dict]]
+            List of (rule, column_mapping) tuples for applicable rules,
+            sorted by usage count (most used first)
+        """
+        results: list[tuple[TransformRule, dict[str, str]]] = []
+        for rule in self._rules.values():
+            # Filter by usage count
+            if rule.usage_count < min_usage:
+                continue
+            # Filter by tags
+            if tags and not all(t in rule.tags for t in tags):
+                continue
+            # Filter by description keywords
+            if description:
+                keywords = description.lower().split()
+                rule_text = f"{rule.name} {rule.description}".lower()
+                if not any(kw in rule_text for kw in keywords):
+                    continue
+            # Check column compatibility
+            mapping: dict[str, str] = {}
+            if columns:
+                matches, mapping = rule.matches_columns(columns)
+                if not matches:
+                    continue
+            results.append((rule, mapping))
+        # Sort by usage count (descending)
+        results.sort(key=lambda x: x[0].usage_count, reverse=True)
+        return results
+    def search_by_description(self, query: str, limit: int = 10) -> list[TransformRule]:
+        """
+        Search rules by description similarity.
+        Parameters
+        ----------
+        query : str
+            Search query
+        limit : int, default=10
+            Maximum number of results
+        Returns
+        -------
+        list[TransformRule]
+            Matching rules sorted by relevance
+        """
+        query_words = set(query.lower().split())
+        scored_rules: list[tuple[float, TransformRule]] = []
+        for rule in self._rules.values():
+            rule_words = set(f"{rule.name} {rule.description}".lower().split())
+            # Simple word overlap scoring
+            overlap = len(query_words & rule_words)
+            if overlap > 0:
+                score = overlap / len(query_words)
+                scored_rules.append((score, rule))
+        # Sort by score descending
+        scored_rules.sort(key=lambda x: x[0], reverse=True)
+        return [rule for _, rule in scored_rules[:limit]]
+    def import_rules(self, path: str, merge: bool = True) -> int:
+        """
+        Import rules from another JSON file.
+        Parameters
+        ----------
+        path : str
+            Path to import from
+        merge : bool, default=True
+            If True, merge with existing rules. If False, replace all.
+        Returns
+        -------
+        int
+            Number of rules imported
+        """
+        import_path = Path(os.path.expanduser(path))
+        if not import_path.exists():
+            raise FileNotFoundError(f"Import file not found: {path}")
+        with open(import_path, encoding="utf-8") as f:
+            data = json.load(f)
+        if not merge:
+            self._rules = {}
+        count = 0
+        for _rule_id, rule_data in data.items():
+            rule = TransformRule.from_dict(rule_data)
+            self._rules[rule.id] = rule
+            count += 1
+        self._save()
+        logger.info(f"Imported {count} rules from {path}")
+        return count
+    def export_rules(self, path: str, tags: Optional[list[str]] = None) -> int:
+        """
+        Export rules to a JSON file.
+        Parameters
+        ----------
+        path : str
+            Path to export to
+        tags : list[str], optional
+            Only export rules with these tags
+        Returns
+        -------
+        int
+            Number of rules exported
+        """
+        export_path = Path(os.path.expanduser(path))
+        export_path.parent.mkdir(parents=True, exist_ok=True)
+        rules_to_export = self.list_rules(tags=tags)
+        with open(export_path, "w", encoding="utf-8") as f:
+            data = {r.id: r.to_dict() for r in rules_to_export}
+            json.dump(data, f, indent=2)
+        logger.info(f"Exported {len(rules_to_export)} rules to {path}")
+        return len(rules_to_export)
+    def clear(self) -> None:
+        """Remove all rules from the store."""
+        self._rules = {}
+        self._save()
+        logger.info("Cleared all rules")
+    def __len__(self) -> int:
+        return len(self._rules)
+    def __contains__(self, rule_id: str) -> bool:
+        return rule_id in self._rules
+    def __iter__(self):
+        return iter(self._rules.values())

featcopilot/transformers/sklearn_compat.py CHANGED Viewed

@@ -15,6 +15,9 @@ from featcopilot.engines.tabular import TabularEngine
 from featcopilot.engines.text import TextEngine
 from featcopilot.engines.timeseries import TimeSeriesEngine
 from featcopilot.selection.unified import FeatureSelector
+from featcopilot.utils.logger import get_logger
+logger = get_logger(__name__)
 class FeatureEngineerTransformer(BaseEstimator, TransformerMixin):
@@ -87,7 +90,7 @@ class AutoFeatureEngineer(BaseEstimator, TransformerMixin):
     Parameters
     ----------
     engines : list, default=['tabular']
-        Engines to use ('tabular', 'timeseries', 'text', 'llm')
+        Engines to use ('tabular', 'timeseries', 'relational', 'text', 'llm')
     max_features : int, optional
         Maximum features to generate/select
     selection_methods : list, default=['mutual_info', 'importance']
@@ -102,7 +105,7 @@ class AutoFeatureEngineer(BaseEstimator, TransformerMixin):
     >>> engineer = AutoFeatureEngineer(
     ...     engines=['tabular', 'llm'],
     ...     max_features=100,
-    ...     llm_config={'model': 'gpt-5', 'enable_semantic': True}
+    ...     llm_config={'model': 'gpt-5.2', 'enable_semantic': True}
     ... )
     >>> X_transformed = engineer.fit_transform(X, y)
     """
@@ -183,7 +186,7 @@ class AutoFeatureEngineer(BaseEstimator, TransformerMixin):
             self._engine_instances[engine_name] = engine
             if self.verbose:
-                print(f"Fitted {engine_name} engine")
+                logger.info(f"Fitted {engine_name} engine")
         self._is_fitted = True
         return self
@@ -196,11 +199,13 @@ class AutoFeatureEngineer(BaseEstimator, TransformerMixin):
             return TimeSeriesEngine(max_features=self.max_features, verbose=self.verbose)
         elif engine_name == "text":
             return TextEngine(max_features=self.max_features, verbose=self.verbose)
+        elif engine_name == "relational":
+            return RelationalEngine(max_features=self.max_features, verbose=self.verbose)
         elif engine_name == "llm":
             from featcopilot.llm.semantic_engine import SemanticEngine
             return SemanticEngine(
-                model=self.llm_config.get("model", "gpt-5"),
+                model=self.llm_config.get("model", "gpt-5.2"),
                 max_suggestions=self.llm_config.get("max_suggestions", 20),
                 domain=self.llm_config.get("domain"),
                 verbose=self.verbose,
@@ -242,7 +247,7 @@ class AutoFeatureEngineer(BaseEstimator, TransformerMixin):
                 result[col] = transformed[col]
             if self.verbose:
-                print(f"{engine_name}: Added {len(new_cols)} features")
+                logger.info(f"{engine_name}: Added {len(new_cols)} features")
         # Handle infinities and NaNs
         result = result.replace([np.inf, -np.inf], np.nan)
@@ -291,18 +296,25 @@ class AutoFeatureEngineer(BaseEstimator, TransformerMixin):
         self.fit(X, y, column_descriptions, task_description, **fit_params)
         result = self.transform(X)
+        # Track original features (input columns) vs derived features
+        if isinstance(X, np.ndarray):
+            original_features = {f"feature_{i}" for i in range(X.shape[1])}
+        else:
+            original_features = set(X.columns)
         # Apply feature selection if enabled and y is provided
         if apply_selection and y is not None and self.max_features:
             self._selector = FeatureSelector(
                 methods=self.selection_methods,
                 max_features=self.max_features,
                 correlation_threshold=self.correlation_threshold,
+                original_features=original_features,
                 verbose=self.verbose,
             )
             result = self._selector.fit_transform(result, y)
             if self.verbose:
-                print(f"Selected {len(self._selector.get_selected_features())} features")
+                logger.info(f"Selected {len(self._selector.get_selected_features())} features")
         return result

featcopilot/utils/__init__.py CHANGED Viewed

@@ -1,9 +1,23 @@
 """Utility functions and classes."""
 from featcopilot.utils.cache import FeatureCache
+from featcopilot.utils.models import (
+    fetch_models,
+    get_default_model,
+    get_model_info,
+    get_model_names,
+    is_valid_model,
+    list_models,
+)
 from featcopilot.utils.parallel import parallel_apply
 __all__ = [
     "parallel_apply",
     "FeatureCache",
+    "fetch_models",
+    "list_models",
+    "get_model_info",
+    "get_default_model",
+    "get_model_names",
+    "is_valid_model",
 ]

featcopilot/utils/logger.py ADDED Viewed

@@ -0,0 +1,47 @@
+"""Centralized logging configuration for featcopilot."""
+from __future__ import annotations
+import logging
+import sys
+# Create the logger
+logger = logging.getLogger("featcopilot")
+# Default handler with line number format
+_handler = logging.StreamHandler(sys.stderr)
+_formatter = logging.Formatter("%(asctime)s - %(name)s - %(levelname)s - %(filename)s:%(lineno)d - %(message)s")
+_handler.setFormatter(_formatter)
+# Only add handler if not already configured
+if not logger.handlers:
+    logger.addHandler(_handler)
+    logger.setLevel(logging.INFO)
+def get_logger(name: str | None = None) -> logging.Logger:
+    """Get a logger instance.
+    Args:
+        name: Optional name for the logger. If None, returns the root featcopilot logger.
+    Returns:
+        A configured logger instance.
+    """
+    if name:
+        # Strip 'featcopilot.' prefix if present to avoid duplication
+        if name.startswith("featcopilot."):
+            name = name[len("featcopilot.") :]
+        return logging.getLogger(f"featcopilot.{name}")
+    return logger
+def set_level(level: int | str) -> None:
+    """Set the logging level.
+    Args:
+        level: Logging level (e.g., logging.DEBUG, logging.INFO, "DEBUG", "INFO")
+    """
+    if isinstance(level, str):
+        level = getattr(logging, level.upper())
+    logger.setLevel(level)

featcopilot 0.1.0__py3-none-any.whl → 0.3.0__py3-none-any.whl

featcopilot 0.1.0py3-none-any.whl → 0.3.0py3-none-any.whl