featcopilot 0.1.0__py3-none-any.whl → 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -3,7 +3,7 @@
3
3
  Uses contextual understanding of data to generate meaningful features.
4
4
  """
5
5
 
6
- from typing import Any, Optional, Union
6
+ from typing import Any, Literal, Optional, Union
7
7
 
8
8
  import numpy as np
9
9
  import pandas as pd
@@ -11,25 +11,30 @@ from pydantic import Field
11
11
 
12
12
  from featcopilot.core.base import BaseEngine, EngineConfig
13
13
  from featcopilot.core.feature import Feature, FeatureOrigin, FeatureSet, FeatureType
14
- from featcopilot.llm.copilot_client import SyncCopilotFeatureClient
14
+ from featcopilot.utils.logger import get_logger
15
+
16
+ logger = get_logger(__name__)
15
17
 
16
18
 
17
19
  class SemanticEngineConfig(EngineConfig):
18
20
  """Configuration for semantic feature engine."""
19
21
 
20
22
  name: str = "SemanticEngine"
21
- model: str = Field(default="gpt-5", description="LLM model to use")
23
+ model: str = Field(default="gpt-5.2", description="LLM model to use")
22
24
  max_suggestions: int = Field(default=20, description="Max features to suggest")
23
25
  validate_features: bool = Field(default=True, description="Validate generated code")
24
26
  domain: Optional[str] = Field(default=None, description="Domain context")
25
27
  temperature: float = Field(default=0.3, description="LLM temperature")
28
+ backend: Literal["copilot", "litellm"] = Field(default="copilot", description="LLM backend to use")
29
+ api_key: Optional[str] = Field(default=None, description="API key for litellm backend")
30
+ api_base: Optional[str] = Field(default=None, description="Custom API base URL for litellm")
26
31
 
27
32
 
28
33
  class SemanticEngine(BaseEngine):
29
34
  """
30
35
  LLM-powered semantic feature engineering engine.
31
36
 
32
- Uses GitHub Copilot SDK to:
37
+ Uses GitHub Copilot SDK or LiteLLM to:
33
38
  - Understand column semantics from names and descriptions
34
39
  - Generate domain-aware features
35
40
  - Create interpretable features with explanations
@@ -39,7 +44,7 @@ class SemanticEngine(BaseEngine):
39
44
 
40
45
  Parameters
41
46
  ----------
42
- model : str, default='gpt-5'
47
+ model : str, default='gpt-5.2'
43
48
  LLM model to use
44
49
  max_suggestions : int, default=20
45
50
  Maximum number of features to suggest
@@ -47,24 +52,54 @@ class SemanticEngine(BaseEngine):
47
52
  Whether to validate generated feature code
48
53
  domain : str, optional
49
54
  Domain context (e.g., 'healthcare', 'finance', 'retail')
55
+ backend : str, default='copilot'
56
+ LLM backend to use: 'copilot' or 'litellm'
57
+ api_key : str, optional
58
+ API key for litellm backend (uses environment variable if not provided)
59
+ api_base : str, optional
60
+ Custom API base URL for litellm backend (for self-hosted models)
50
61
 
51
62
  Examples
52
63
  --------
53
- >>> engine = SemanticEngine(model='gpt-5', domain='healthcare')
64
+ Using GitHub Copilot SDK (default):
65
+ >>> engine = SemanticEngine(model='gpt-5.2', domain='healthcare')
54
66
  >>> X_features = engine.fit_transform(
55
67
  ... X, y,
56
68
  ... column_descriptions={'age': 'Patient age', 'bmi': 'Body mass index'},
57
69
  ... task_description='Predict diabetes risk'
58
70
  ... )
71
+
72
+ Using LiteLLM with OpenAI:
73
+ >>> engine = SemanticEngine(
74
+ ... model='gpt-4o',
75
+ ... backend='litellm',
76
+ ... api_key='your-api-key' # or set OPENAI_API_KEY env var
77
+ ... )
78
+
79
+ Using LiteLLM with Anthropic:
80
+ >>> engine = SemanticEngine(
81
+ ... model='claude-3-opus',
82
+ ... backend='litellm'
83
+ ... )
84
+
85
+ Using LiteLLM with local Ollama:
86
+ >>> engine = SemanticEngine(
87
+ ... model='ollama/llama2',
88
+ ... backend='litellm',
89
+ ... api_base='http://localhost:11434'
90
+ ... )
59
91
  """
60
92
 
61
93
  def __init__(
62
94
  self,
63
- model: str = "gpt-5",
95
+ model: str = "gpt-5.2",
64
96
  max_suggestions: int = 20,
65
97
  validate_features: bool = True,
66
98
  domain: Optional[str] = None,
67
99
  verbose: bool = False,
100
+ backend: Literal["copilot", "litellm"] = "copilot",
101
+ api_key: Optional[str] = None,
102
+ api_base: Optional[str] = None,
68
103
  **kwargs,
69
104
  ):
70
105
  config = SemanticEngineConfig(
@@ -73,11 +108,14 @@ class SemanticEngine(BaseEngine):
73
108
  validate_features=validate_features,
74
109
  domain=domain,
75
110
  verbose=verbose,
111
+ backend=backend,
112
+ api_key=api_key,
113
+ api_base=api_base,
76
114
  **kwargs,
77
115
  )
78
116
  super().__init__(config=config)
79
117
  self.config: SemanticEngineConfig = config
80
- self._client: Optional[SyncCopilotFeatureClient] = None
118
+ self._client: Optional[Any] = None
81
119
  self._suggested_features: list[dict[str, Any]] = []
82
120
  self._feature_set = FeatureSet()
83
121
  self._column_info: dict[str, str] = {}
@@ -85,9 +123,20 @@ class SemanticEngine(BaseEngine):
85
123
  self._task_description: str = ""
86
124
 
87
125
  def _ensure_client(self) -> None:
88
- """Ensure Copilot client is initialized."""
126
+ """Ensure LLM client is initialized."""
89
127
  if self._client is None:
90
- self._client = SyncCopilotFeatureClient(model=self.config.model)
128
+ if self.config.backend == "litellm":
129
+ from featcopilot.llm.litellm_client import SyncLiteLLMFeatureClient
130
+
131
+ self._client = SyncLiteLLMFeatureClient(
132
+ model=self.config.model,
133
+ api_key=self.config.api_key,
134
+ api_base=self.config.api_base,
135
+ )
136
+ else:
137
+ from featcopilot.llm.copilot_client import SyncCopilotFeatureClient
138
+
139
+ self._client = SyncCopilotFeatureClient(model=self.config.model)
91
140
  self._client.start()
92
141
 
93
142
  def fit(
@@ -137,7 +186,7 @@ class SemanticEngine(BaseEngine):
137
186
 
138
187
  # Get LLM suggestions
139
188
  if self.config.verbose:
140
- print("SemanticEngine: Requesting feature suggestions from LLM...")
189
+ logger.info("SemanticEngine: Requesting feature suggestions from LLM...")
141
190
 
142
191
  self._suggested_features = self._client.suggest_features(
143
192
  column_info=self._column_info,
@@ -148,7 +197,7 @@ class SemanticEngine(BaseEngine):
148
197
  )
149
198
 
150
199
  if self.config.verbose:
151
- print(f"SemanticEngine: Received {len(self._suggested_features)} suggestions")
200
+ logger.info(f"SemanticEngine: Received {len(self._suggested_features)} suggestions")
152
201
 
153
202
  # Validate features if enabled
154
203
  if self.config.validate_features:
@@ -175,14 +224,14 @@ class SemanticEngine(BaseEngine):
175
224
  if result["valid"]:
176
225
  valid_features.append(feature)
177
226
  elif self.config.verbose:
178
- print(
227
+ logger.warning(
179
228
  f"SemanticEngine: Invalid feature '{feature.get('name', 'unknown')}': {result.get('error', 'unknown error')}"
180
229
  )
181
230
 
182
231
  self._suggested_features = valid_features
183
232
 
184
233
  if self.config.verbose:
185
- print(f"SemanticEngine: {len(valid_features)} valid features after validation")
234
+ logger.info(f"SemanticEngine: {len(valid_features)} valid features after validation")
186
235
 
187
236
  def _build_feature_set(self) -> None:
188
237
  """Build FeatureSet from suggestions."""
@@ -266,7 +315,7 @@ class SemanticEngine(BaseEngine):
266
315
 
267
316
  except Exception as e:
268
317
  if self.config.verbose:
269
- print(f"SemanticEngine: Error computing '{name}': {e}")
318
+ logger.error(f"SemanticEngine: Error computing '{name}': {e}")
270
319
 
271
320
  # Handle infinities and NaNs
272
321
  result = result.replace([np.inf, -np.inf], np.nan)
@@ -274,7 +323,7 @@ class SemanticEngine(BaseEngine):
274
323
  self._feature_names = successful_features
275
324
 
276
325
  if self.config.verbose:
277
- print(f"SemanticEngine: Successfully generated {len(successful_features)} features")
326
+ logger.info(f"SemanticEngine: Successfully generated {len(successful_features)} features")
278
327
 
279
328
  return result
280
329
 
@@ -6,6 +6,9 @@ import numpy as np
6
6
  import pandas as pd
7
7
 
8
8
  from featcopilot.core.base import BaseSelector
9
+ from featcopilot.utils.logger import get_logger
10
+
11
+ logger = get_logger(__name__)
9
12
 
10
13
 
11
14
  class ImportanceSelector(BaseSelector):
@@ -119,7 +122,7 @@ class ImportanceSelector(BaseSelector):
119
122
  return xgb.XGBRegressor(n_estimators=self.n_estimators, random_state=42, n_jobs=-1)
120
123
  except ImportError:
121
124
  if self.verbose:
122
- print("XGBoost not available, falling back to RandomForest")
125
+ logger.warning("XGBoost not available, falling back to RandomForest")
123
126
  return self._create_model_fallback(is_classification)
124
127
 
125
128
  else:
@@ -149,7 +152,7 @@ class ImportanceSelector(BaseSelector):
149
152
  self._selected_features = [name for name, _ in sorted_features]
150
153
 
151
154
  if self.verbose:
152
- print(f"ImportanceSelector: Selected {len(self._selected_features)} features")
155
+ logger.info(f"ImportanceSelector: Selected {len(self._selected_features)} features")
153
156
 
154
157
  def transform(self, X: Union[pd.DataFrame, np.ndarray], **kwargs) -> pd.DataFrame:
155
158
  """Select features from data."""
@@ -6,6 +6,9 @@ import numpy as np
6
6
  import pandas as pd
7
7
 
8
8
  from featcopilot.core.base import BaseSelector
9
+ from featcopilot.utils.logger import get_logger
10
+
11
+ logger = get_logger(__name__)
9
12
 
10
13
 
11
14
  class RedundancyEliminator(BaseSelector):
@@ -119,11 +122,11 @@ class RedundancyEliminator(BaseSelector):
119
122
  if imp1 >= imp2:
120
123
  to_remove.add(col2)
121
124
  if self.verbose:
122
- print(f"Removing {col2} (corr={corr:.3f} with {col1})")
125
+ logger.info(f"Removing {col2} (corr={corr:.3f} with {col1})")
123
126
  else:
124
127
  to_remove.add(col1)
125
128
  if self.verbose:
126
- print(f"Removing {col1} (corr={corr:.3f} with {col2})")
129
+ logger.info(f"Removing {col1} (corr={corr:.3f} with {col2})")
127
130
  break # col1 is removed, move to next
128
131
 
129
132
  # Selected features are those not removed
@@ -131,7 +134,7 @@ class RedundancyEliminator(BaseSelector):
131
134
  self._removed_features = list(to_remove)
132
135
 
133
136
  if self.verbose:
134
- print(f"RedundancyEliminator: Removed {len(to_remove)} redundant features")
137
+ logger.info(f"RedundancyEliminator: Removed {len(to_remove)} redundant features")
135
138
 
136
139
  def transform(self, X: Union[pd.DataFrame, np.ndarray], **kwargs) -> pd.DataFrame:
137
140
  """Remove redundant features."""
@@ -6,6 +6,9 @@ import numpy as np
6
6
  import pandas as pd
7
7
 
8
8
  from featcopilot.core.base import BaseSelector
9
+ from featcopilot.utils.logger import get_logger
10
+
11
+ logger = get_logger(__name__)
9
12
 
10
13
 
11
14
  class StatisticalSelector(BaseSelector):
@@ -173,7 +176,7 @@ class StatisticalSelector(BaseSelector):
173
176
  self._selected_features = [name for name, _ in sorted_features]
174
177
 
175
178
  if self.verbose:
176
- print(f"StatisticalSelector: Selected {len(self._selected_features)} features")
179
+ logger.info(f"StatisticalSelector: Selected {len(self._selected_features)} features")
177
180
 
178
181
  def transform(self, X: Union[pd.DataFrame, np.ndarray], **kwargs) -> pd.DataFrame:
179
182
  """
@@ -9,6 +9,9 @@ from featcopilot.core.base import BaseSelector
9
9
  from featcopilot.selection.importance import ImportanceSelector
10
10
  from featcopilot.selection.redundancy import RedundancyEliminator
11
11
  from featcopilot.selection.statistical import StatisticalSelector
12
+ from featcopilot.utils.logger import get_logger
13
+
14
+ logger = get_logger(__name__)
12
15
 
13
16
 
14
17
  class FeatureSelector(BaseSelector):
@@ -152,7 +155,7 @@ class FeatureSelector(BaseSelector):
152
155
  self._selected_features = [name for name, _ in sorted_features]
153
156
 
154
157
  if self.verbose:
155
- print(f"FeatureSelector: Selected {len(self._selected_features)} features")
158
+ logger.info(f"FeatureSelector: Selected {len(self._selected_features)} features")
156
159
 
157
160
  def transform(self, X: Union[pd.DataFrame, np.ndarray], **kwargs) -> pd.DataFrame:
158
161
  """Select features from data."""
@@ -0,0 +1,15 @@
1
+ """Feature store integrations for FeatCopilot.
2
+
3
+ Provides interfaces to save and retrieve engineered features
4
+ from popular feature stores like Feast, enabling feature reuse
5
+ and serving in production ML systems.
6
+ """
7
+
8
+ from featcopilot.stores.base import BaseFeatureStore, FeatureStoreConfig
9
+ from featcopilot.stores.feast_store import FeastFeatureStore
10
+
11
+ __all__ = [
12
+ "BaseFeatureStore",
13
+ "FeatureStoreConfig",
14
+ "FeastFeatureStore",
15
+ ]
@@ -0,0 +1,166 @@
1
+ """Base classes for feature store integrations."""
2
+
3
+ from abc import ABC, abstractmethod
4
+ from typing import Any, Optional
5
+
6
+ import pandas as pd
7
+ from pydantic import BaseModel, Field
8
+
9
+ from featcopilot.core.feature import FeatureSet
10
+
11
+
12
+ class FeatureStoreConfig(BaseModel):
13
+ """Base configuration for feature stores."""
14
+
15
+ name: str = Field(description="Feature store name")
16
+ entity_columns: list[str] = Field(default_factory=list, description="Entity/key columns")
17
+ timestamp_column: Optional[str] = Field(default=None, description="Event timestamp column")
18
+ feature_prefix: str = Field(default="", description="Prefix for feature names")
19
+ tags: dict[str, str] = Field(default_factory=dict, description="Tags/labels for features")
20
+
21
+
22
+ class BaseFeatureStore(ABC):
23
+ """
24
+ Abstract base class for feature store integrations.
25
+
26
+ Provides a unified interface for saving and retrieving
27
+ engineered features from various feature stores.
28
+
29
+ Parameters
30
+ ----------
31
+ config : FeatureStoreConfig
32
+ Configuration for the feature store
33
+
34
+ Examples
35
+ --------
36
+ >>> store = ConcreteFeatureStore(config)
37
+ >>> store.save_features(X_transformed, feature_set, feature_view_name='my_features')
38
+ >>> features = store.get_features(entity_df, feature_names=['feat1', 'feat2'])
39
+ """
40
+
41
+ def __init__(self, config: FeatureStoreConfig):
42
+ self.config = config
43
+ self._is_initialized = False
44
+
45
+ @abstractmethod
46
+ def initialize(self) -> None:
47
+ """
48
+ Initialize connection to the feature store.
49
+
50
+ This should be called before any other operations.
51
+ """
52
+ pass
53
+
54
+ @abstractmethod
55
+ def save_features(
56
+ self,
57
+ df: pd.DataFrame,
58
+ feature_set: Optional[FeatureSet] = None,
59
+ feature_view_name: str = "featcopilot_features",
60
+ description: Optional[str] = None,
61
+ **kwargs,
62
+ ) -> None:
63
+ """
64
+ Save features to the feature store.
65
+
66
+ Parameters
67
+ ----------
68
+ df : DataFrame
69
+ DataFrame containing features to save
70
+ feature_set : FeatureSet, optional
71
+ FeatCopilot FeatureSet with metadata
72
+ feature_view_name : str
73
+ Name for the feature view/table
74
+ description : str, optional
75
+ Description of the feature view
76
+ **kwargs
77
+ Additional store-specific options
78
+ """
79
+ pass
80
+
81
+ @abstractmethod
82
+ def get_features(
83
+ self,
84
+ entity_df: pd.DataFrame,
85
+ feature_names: list[str],
86
+ feature_view_name: str = "featcopilot_features",
87
+ **kwargs,
88
+ ) -> pd.DataFrame:
89
+ """
90
+ Retrieve features from the feature store.
91
+
92
+ Parameters
93
+ ----------
94
+ entity_df : DataFrame
95
+ DataFrame with entity keys and timestamps
96
+ feature_names : list
97
+ Names of features to retrieve
98
+ feature_view_name : str
99
+ Name of the feature view/table
100
+ **kwargs
101
+ Additional store-specific options
102
+
103
+ Returns
104
+ -------
105
+ DataFrame
106
+ DataFrame with requested features
107
+ """
108
+ pass
109
+
110
+ @abstractmethod
111
+ def list_feature_views(self) -> list[str]:
112
+ """
113
+ List all feature views in the store.
114
+
115
+ Returns
116
+ -------
117
+ list
118
+ Names of feature views
119
+ """
120
+ pass
121
+
122
+ @abstractmethod
123
+ def get_feature_view_schema(self, feature_view_name: str) -> dict[str, Any]:
124
+ """
125
+ Get schema/metadata for a feature view.
126
+
127
+ Parameters
128
+ ----------
129
+ feature_view_name : str
130
+ Name of the feature view
131
+
132
+ Returns
133
+ -------
134
+ dict
135
+ Schema information
136
+ """
137
+ pass
138
+
139
+ @abstractmethod
140
+ def delete_feature_view(self, feature_view_name: str) -> bool:
141
+ """
142
+ Delete a feature view.
143
+
144
+ Parameters
145
+ ----------
146
+ feature_view_name : str
147
+ Name of the feature view to delete
148
+
149
+ Returns
150
+ -------
151
+ bool
152
+ Whether deletion was successful
153
+ """
154
+ pass
155
+
156
+ def close(self) -> None:
157
+ """Close connection to the feature store."""
158
+ self._is_initialized = False
159
+
160
+ def __enter__(self):
161
+ self.initialize()
162
+ return self
163
+
164
+ def __exit__(self, exc_type, exc_val, exc_tb):
165
+ self.close()
166
+ return False