featcopilot 0.1.0__tar.gz → 0.2.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {featcopilot-0.1.0 → featcopilot-0.2.0}/PKG-INFO +32 -9
- {featcopilot-0.1.0 → featcopilot-0.2.0}/README.md +16 -8
- {featcopilot-0.1.0 → featcopilot-0.2.0}/featcopilot/__init__.py +3 -1
- {featcopilot-0.1.0 → featcopilot-0.2.0}/featcopilot/core/feature.py +5 -1
- {featcopilot-0.1.0 → featcopilot-0.2.0}/featcopilot/engines/relational.py +5 -2
- {featcopilot-0.1.0 → featcopilot-0.2.0}/featcopilot/engines/tabular.py +6 -3
- {featcopilot-0.1.0 → featcopilot-0.2.0}/featcopilot/engines/text.py +6 -3
- {featcopilot-0.1.0 → featcopilot-0.2.0}/featcopilot/engines/timeseries.py +5 -2
- {featcopilot-0.1.0 → featcopilot-0.2.0}/featcopilot/llm/__init__.py +4 -1
- {featcopilot-0.1.0 → featcopilot-0.2.0}/featcopilot/llm/code_generator.py +7 -4
- {featcopilot-0.1.0 → featcopilot-0.2.0}/featcopilot/llm/copilot_client.py +67 -23
- {featcopilot-0.1.0 → featcopilot-0.2.0}/featcopilot/llm/explainer.py +6 -3
- featcopilot-0.2.0/featcopilot/llm/litellm_client.py +595 -0
- {featcopilot-0.1.0 → featcopilot-0.2.0}/featcopilot/llm/semantic_engine.py +65 -16
- {featcopilot-0.1.0 → featcopilot-0.2.0}/featcopilot/selection/importance.py +5 -2
- {featcopilot-0.1.0 → featcopilot-0.2.0}/featcopilot/selection/redundancy.py +6 -3
- {featcopilot-0.1.0 → featcopilot-0.2.0}/featcopilot/selection/statistical.py +4 -1
- {featcopilot-0.1.0 → featcopilot-0.2.0}/featcopilot/selection/unified.py +4 -1
- featcopilot-0.2.0/featcopilot/stores/__init__.py +15 -0
- featcopilot-0.2.0/featcopilot/stores/base.py +166 -0
- featcopilot-0.2.0/featcopilot/stores/feast_store.py +541 -0
- {featcopilot-0.1.0 → featcopilot-0.2.0}/featcopilot/transformers/sklearn_compat.py +8 -5
- featcopilot-0.2.0/featcopilot/utils/__init__.py +23 -0
- featcopilot-0.2.0/featcopilot/utils/logger.py +47 -0
- featcopilot-0.2.0/featcopilot/utils/models.py +287 -0
- {featcopilot-0.1.0 → featcopilot-0.2.0}/featcopilot/utils/parallel.py +5 -1
- {featcopilot-0.1.0 → featcopilot-0.2.0}/featcopilot.egg-info/PKG-INFO +32 -9
- {featcopilot-0.1.0 → featcopilot-0.2.0}/featcopilot.egg-info/SOURCES.txt +9 -1
- {featcopilot-0.1.0 → featcopilot-0.2.0}/featcopilot.egg-info/requires.txt +18 -0
- {featcopilot-0.1.0 → featcopilot-0.2.0}/pyproject.toml +19 -1
- featcopilot-0.2.0/tests/test_litellm.py +249 -0
- featcopilot-0.2.0/tests/test_stores.py +261 -0
- featcopilot-0.1.0/featcopilot/utils/__init__.py +0 -9
- {featcopilot-0.1.0 → featcopilot-0.2.0}/featcopilot/core/__init__.py +0 -0
- {featcopilot-0.1.0 → featcopilot-0.2.0}/featcopilot/core/base.py +0 -0
- {featcopilot-0.1.0 → featcopilot-0.2.0}/featcopilot/core/registry.py +0 -0
- {featcopilot-0.1.0 → featcopilot-0.2.0}/featcopilot/engines/__init__.py +0 -0
- {featcopilot-0.1.0 → featcopilot-0.2.0}/featcopilot/selection/__init__.py +0 -0
- {featcopilot-0.1.0 → featcopilot-0.2.0}/featcopilot/transformers/__init__.py +0 -0
- {featcopilot-0.1.0 → featcopilot-0.2.0}/featcopilot/utils/cache.py +0 -0
- {featcopilot-0.1.0 → featcopilot-0.2.0}/featcopilot.egg-info/dependency_links.txt +0 -0
- {featcopilot-0.1.0 → featcopilot-0.2.0}/featcopilot.egg-info/top_level.txt +0 -0
- {featcopilot-0.1.0 → featcopilot-0.2.0}/setup.cfg +0 -0
- {featcopilot-0.1.0 → featcopilot-0.2.0}/tests/test_autofeat.py +0 -0
- {featcopilot-0.1.0 → featcopilot-0.2.0}/tests/test_core.py +0 -0
- {featcopilot-0.1.0 → featcopilot-0.2.0}/tests/test_engines.py +0 -0
- {featcopilot-0.1.0 → featcopilot-0.2.0}/tests/test_selection.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: featcopilot
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.2.0
|
|
4
4
|
Summary: Next-generation LLM-powered auto feature engineering framework with GitHub Copilot SDK
|
|
5
5
|
Author: FeatCopilot Contributors
|
|
6
6
|
License: MIT
|
|
@@ -28,11 +28,26 @@ Requires-Dist: pydantic>=2.0.0
|
|
|
28
28
|
Requires-Dist: joblib>=1.1.0
|
|
29
29
|
Provides-Extra: llm
|
|
30
30
|
Requires-Dist: github-copilot-sdk>=0.1.0; extra == "llm"
|
|
31
|
+
Requires-Dist: nest_asyncio>=1.5.0; extra == "llm"
|
|
32
|
+
Provides-Extra: litellm
|
|
33
|
+
Requires-Dist: litellm>=1.0.0; extra == "litellm"
|
|
34
|
+
Requires-Dist: nest_asyncio>=1.5.0; extra == "litellm"
|
|
31
35
|
Provides-Extra: timeseries
|
|
32
36
|
Requires-Dist: statsmodels>=0.13.0; extra == "timeseries"
|
|
37
|
+
Provides-Extra: feast
|
|
38
|
+
Requires-Dist: feast>=0.30.0; extra == "feast"
|
|
33
39
|
Provides-Extra: full
|
|
34
40
|
Requires-Dist: github-copilot-sdk>=0.1.0; extra == "full"
|
|
41
|
+
Requires-Dist: litellm>=1.0.0; extra == "full"
|
|
35
42
|
Requires-Dist: statsmodels>=0.13.0; extra == "full"
|
|
43
|
+
Requires-Dist: feast>=0.30.0; extra == "full"
|
|
44
|
+
Requires-Dist: nest_asyncio>=1.5.0; extra == "full"
|
|
45
|
+
Provides-Extra: benchmark
|
|
46
|
+
Requires-Dist: github-copilot-sdk>=0.1.0; extra == "benchmark"
|
|
47
|
+
Requires-Dist: statsmodels>=0.13.0; extra == "benchmark"
|
|
48
|
+
Requires-Dist: flaml[automl,blendsearch]>=2.0.0; extra == "benchmark"
|
|
49
|
+
Requires-Dist: autogluon.tabular>=1.0.0; extra == "benchmark"
|
|
50
|
+
Requires-Dist: h2o>=3.40.0; extra == "benchmark"
|
|
36
51
|
Provides-Extra: dev
|
|
37
52
|
Requires-Dist: pytest>=7.0.0; extra == "dev"
|
|
38
53
|
Requires-Dist: pytest-asyncio>=0.21.0; extra == "dev"
|
|
@@ -44,9 +59,9 @@ Requires-Dist: pre-commit>=3.6.0; extra == "dev"
|
|
|
44
59
|
|
|
45
60
|
# FeatCopilot 🚀
|
|
46
61
|
|
|
47
|
-
**Next-Generation LLM-Powered Auto Feature Engineering
|
|
62
|
+
**Next-Generation LLM-Powered Auto Feature Engineering Framework**
|
|
48
63
|
|
|
49
|
-
FeatCopilot
|
|
64
|
+
FeatCopilot automatically generates, selects, and explains predictive features using semantic understanding. It analyzes column meanings, applies domain-aware transformations, and provides human-readable explanations—turning raw data into ML-ready features in seconds.
|
|
50
65
|
|
|
51
66
|
## 📊 Benchmark Highlights
|
|
52
67
|
|
|
@@ -59,7 +74,7 @@ FeatCopilot is a unified feature engineering framework that combines the best ap
|
|
|
59
74
|
| Classification | +0.54% | +4.35% |
|
|
60
75
|
| Regression | +0.65% | +5.57% |
|
|
61
76
|
|
|
62
|
-
### LLM Engine (With
|
|
77
|
+
### LLM Engine (With LiteLLM - 30-60s)
|
|
63
78
|
|
|
64
79
|
| Task Type | Average Improvement | Best Case |
|
|
65
80
|
|-----------|--------------------:|----------:|
|
|
@@ -87,7 +102,7 @@ FeatCopilot is a unified feature engineering framework that combines the best ap
|
|
|
87
102
|
# Basic installation
|
|
88
103
|
pip install featcopilot
|
|
89
104
|
|
|
90
|
-
# With LLM capabilities
|
|
105
|
+
# With LLM capabilities
|
|
91
106
|
pip install featcopilot[llm]
|
|
92
107
|
|
|
93
108
|
# Full installation
|
|
@@ -111,7 +126,7 @@ X_transformed = engineer.fit_transform(X, y) # <1 second
|
|
|
111
126
|
print(f"Features: {X.shape[1]} -> {X_transformed.shape[1]}")
|
|
112
127
|
```
|
|
113
128
|
|
|
114
|
-
### LLM Mode (With
|
|
129
|
+
### LLM Mode (With LiteLLM)
|
|
115
130
|
|
|
116
131
|
```python
|
|
117
132
|
from featcopilot import AutoFeatureEngineer
|
|
@@ -164,16 +179,24 @@ engine = TimeSeriesEngine(
|
|
|
164
179
|
```
|
|
165
180
|
|
|
166
181
|
### LLM Engine
|
|
167
|
-
Uses GitHub Copilot SDK for intelligent feature generation.
|
|
182
|
+
Uses GitHub Copilot SDK (default) or LiteLLM (100+ providers) for intelligent feature generation.
|
|
168
183
|
|
|
169
184
|
```python
|
|
170
185
|
from featcopilot.llm import SemanticEngine
|
|
171
186
|
|
|
187
|
+
# Default: GitHub Copilot SDK
|
|
172
188
|
engine = SemanticEngine(
|
|
173
|
-
model='gpt-5',
|
|
189
|
+
model='gpt-5.2',
|
|
174
190
|
max_suggestions=20,
|
|
175
191
|
validate_features=True
|
|
176
192
|
)
|
|
193
|
+
|
|
194
|
+
# Alternative: LiteLLM backend
|
|
195
|
+
engine = SemanticEngine(
|
|
196
|
+
model='gpt-4o',
|
|
197
|
+
backend='litellm',
|
|
198
|
+
max_suggestions=20
|
|
199
|
+
)
|
|
177
200
|
```
|
|
178
201
|
|
|
179
202
|
## Feature Selection
|
|
@@ -211,7 +234,7 @@ X_selected = selector.fit_transform(X, y)
|
|
|
211
234
|
|
|
212
235
|
- Python 3.9+
|
|
213
236
|
- NumPy, Pandas, Scikit-learn
|
|
214
|
-
- GitHub Copilot
|
|
237
|
+
- GitHub Copilot SDK (default) or LiteLLM (for 100+ LLM providers)
|
|
215
238
|
|
|
216
239
|
## License
|
|
217
240
|
|
|
@@ -1,8 +1,8 @@
|
|
|
1
1
|
# FeatCopilot 🚀
|
|
2
2
|
|
|
3
|
-
**Next-Generation LLM-Powered Auto Feature Engineering
|
|
3
|
+
**Next-Generation LLM-Powered Auto Feature Engineering Framework**
|
|
4
4
|
|
|
5
|
-
FeatCopilot
|
|
5
|
+
FeatCopilot automatically generates, selects, and explains predictive features using semantic understanding. It analyzes column meanings, applies domain-aware transformations, and provides human-readable explanations—turning raw data into ML-ready features in seconds.
|
|
6
6
|
|
|
7
7
|
## 📊 Benchmark Highlights
|
|
8
8
|
|
|
@@ -15,7 +15,7 @@ FeatCopilot is a unified feature engineering framework that combines the best ap
|
|
|
15
15
|
| Classification | +0.54% | +4.35% |
|
|
16
16
|
| Regression | +0.65% | +5.57% |
|
|
17
17
|
|
|
18
|
-
### LLM Engine (With
|
|
18
|
+
### LLM Engine (With LiteLLM - 30-60s)
|
|
19
19
|
|
|
20
20
|
| Task Type | Average Improvement | Best Case |
|
|
21
21
|
|-----------|--------------------:|----------:|
|
|
@@ -43,7 +43,7 @@ FeatCopilot is a unified feature engineering framework that combines the best ap
|
|
|
43
43
|
# Basic installation
|
|
44
44
|
pip install featcopilot
|
|
45
45
|
|
|
46
|
-
# With LLM capabilities
|
|
46
|
+
# With LLM capabilities
|
|
47
47
|
pip install featcopilot[llm]
|
|
48
48
|
|
|
49
49
|
# Full installation
|
|
@@ -67,7 +67,7 @@ X_transformed = engineer.fit_transform(X, y) # <1 second
|
|
|
67
67
|
print(f"Features: {X.shape[1]} -> {X_transformed.shape[1]}")
|
|
68
68
|
```
|
|
69
69
|
|
|
70
|
-
### LLM Mode (With
|
|
70
|
+
### LLM Mode (With LiteLLM)
|
|
71
71
|
|
|
72
72
|
```python
|
|
73
73
|
from featcopilot import AutoFeatureEngineer
|
|
@@ -120,16 +120,24 @@ engine = TimeSeriesEngine(
|
|
|
120
120
|
```
|
|
121
121
|
|
|
122
122
|
### LLM Engine
|
|
123
|
-
Uses GitHub Copilot SDK for intelligent feature generation.
|
|
123
|
+
Uses GitHub Copilot SDK (default) or LiteLLM (100+ providers) for intelligent feature generation.
|
|
124
124
|
|
|
125
125
|
```python
|
|
126
126
|
from featcopilot.llm import SemanticEngine
|
|
127
127
|
|
|
128
|
+
# Default: GitHub Copilot SDK
|
|
128
129
|
engine = SemanticEngine(
|
|
129
|
-
model='gpt-5',
|
|
130
|
+
model='gpt-5.2',
|
|
130
131
|
max_suggestions=20,
|
|
131
132
|
validate_features=True
|
|
132
133
|
)
|
|
134
|
+
|
|
135
|
+
# Alternative: LiteLLM backend
|
|
136
|
+
engine = SemanticEngine(
|
|
137
|
+
model='gpt-4o',
|
|
138
|
+
backend='litellm',
|
|
139
|
+
max_suggestions=20
|
|
140
|
+
)
|
|
133
141
|
```
|
|
134
142
|
|
|
135
143
|
## Feature Selection
|
|
@@ -167,7 +175,7 @@ X_selected = selector.fit_transform(X, y)
|
|
|
167
175
|
|
|
168
176
|
- Python 3.9+
|
|
169
177
|
- NumPy, Pandas, Scikit-learn
|
|
170
|
-
- GitHub Copilot
|
|
178
|
+
- GitHub Copilot SDK (default) or LiteLLM (for 100+ LLM providers)
|
|
171
179
|
|
|
172
180
|
## License
|
|
173
181
|
|
|
@@ -5,7 +5,9 @@ A unified feature engineering framework combining traditional approaches
|
|
|
5
5
|
with novel LLM-powered capabilities via GitHub Copilot SDK.
|
|
6
6
|
"""
|
|
7
7
|
|
|
8
|
-
|
|
8
|
+
from importlib.metadata import version
|
|
9
|
+
|
|
10
|
+
__version__ = version("featcopilot")
|
|
9
11
|
__author__ = "FeatCopilot Contributors"
|
|
10
12
|
|
|
11
13
|
from featcopilot.core.base import BaseEngine, BaseSelector
|
|
@@ -7,6 +7,10 @@ from typing import Any, Optional
|
|
|
7
7
|
import numpy as np
|
|
8
8
|
import pandas as pd
|
|
9
9
|
|
|
10
|
+
from featcopilot.utils.logger import get_logger
|
|
11
|
+
|
|
12
|
+
logger = get_logger(__name__)
|
|
13
|
+
|
|
10
14
|
|
|
11
15
|
class FeatureType(Enum):
|
|
12
16
|
"""Types of features."""
|
|
@@ -220,5 +224,5 @@ class FeatureSet:
|
|
|
220
224
|
result[feature.name] = feature.compute(df)
|
|
221
225
|
except Exception as e:
|
|
222
226
|
# Log warning but continue
|
|
223
|
-
|
|
227
|
+
logger.warning(f"Could not compute feature {feature.name}: {e}")
|
|
224
228
|
return result
|
|
@@ -11,6 +11,9 @@ from pydantic import Field
|
|
|
11
11
|
|
|
12
12
|
from featcopilot.core.base import BaseEngine, EngineConfig
|
|
13
13
|
from featcopilot.core.feature import FeatureSet
|
|
14
|
+
from featcopilot.utils.logger import get_logger
|
|
15
|
+
|
|
16
|
+
logger = get_logger(__name__)
|
|
14
17
|
|
|
15
18
|
|
|
16
19
|
class RelationalEngineConfig(EngineConfig):
|
|
@@ -141,7 +144,7 @@ class RelationalEngine(BaseEngine):
|
|
|
141
144
|
self._primary_columns = X.columns.tolist()
|
|
142
145
|
|
|
143
146
|
if self.config.verbose:
|
|
144
|
-
|
|
147
|
+
logger.info(f"RelationalEngine: {len(self._relationships)} relationships defined")
|
|
145
148
|
|
|
146
149
|
self._is_fitted = True
|
|
147
150
|
return self
|
|
@@ -191,7 +194,7 @@ class RelationalEngine(BaseEngine):
|
|
|
191
194
|
self._feature_names = [c for c in result.columns if c not in X.columns]
|
|
192
195
|
|
|
193
196
|
if self.config.verbose:
|
|
194
|
-
|
|
197
|
+
logger.info(f"RelationalEngine: Generated {len(self._feature_names)} features")
|
|
195
198
|
|
|
196
199
|
return result
|
|
197
200
|
|
|
@@ -12,6 +12,9 @@ from pydantic import Field
|
|
|
12
12
|
|
|
13
13
|
from featcopilot.core.base import BaseEngine, EngineConfig
|
|
14
14
|
from featcopilot.core.feature import Feature, FeatureOrigin, FeatureSet, FeatureType
|
|
15
|
+
from featcopilot.utils.logger import get_logger
|
|
16
|
+
|
|
17
|
+
logger = get_logger(__name__)
|
|
15
18
|
|
|
16
19
|
|
|
17
20
|
class TabularEngineConfig(EngineConfig):
|
|
@@ -124,7 +127,7 @@ class TabularEngine(BaseEngine):
|
|
|
124
127
|
]
|
|
125
128
|
|
|
126
129
|
if self.config.verbose:
|
|
127
|
-
|
|
130
|
+
logger.info(f"TabularEngine: Found {len(self._numeric_columns)} numeric columns")
|
|
128
131
|
|
|
129
132
|
# Plan features to generate
|
|
130
133
|
self._plan_features(X)
|
|
@@ -207,7 +210,7 @@ class TabularEngine(BaseEngine):
|
|
|
207
210
|
self._feature_set.add(feature)
|
|
208
211
|
|
|
209
212
|
if self.config.verbose:
|
|
210
|
-
|
|
213
|
+
logger.info(f"TabularEngine: Planned {len(self._feature_set)} features")
|
|
211
214
|
|
|
212
215
|
def transform(self, X: Union[pd.DataFrame, np.ndarray], **kwargs) -> pd.DataFrame:
|
|
213
216
|
"""
|
|
@@ -284,7 +287,7 @@ class TabularEngine(BaseEngine):
|
|
|
284
287
|
self._feature_names = [c for c in result.columns if c not in X.columns]
|
|
285
288
|
|
|
286
289
|
if self.config.verbose:
|
|
287
|
-
|
|
290
|
+
logger.info(f"TabularEngine: Generated {len(self._feature_names)} features")
|
|
288
291
|
|
|
289
292
|
return result
|
|
290
293
|
|
|
@@ -11,6 +11,9 @@ from pydantic import Field
|
|
|
11
11
|
|
|
12
12
|
from featcopilot.core.base import BaseEngine, EngineConfig
|
|
13
13
|
from featcopilot.core.feature import FeatureSet
|
|
14
|
+
from featcopilot.utils.logger import get_logger
|
|
15
|
+
|
|
16
|
+
logger = get_logger(__name__)
|
|
14
17
|
|
|
15
18
|
|
|
16
19
|
class TextEngineConfig(EngineConfig):
|
|
@@ -106,7 +109,7 @@ class TextEngine(BaseEngine):
|
|
|
106
109
|
]
|
|
107
110
|
|
|
108
111
|
if self.config.verbose:
|
|
109
|
-
|
|
112
|
+
logger.info(f"TextEngine: Found {len(self._text_columns)} text columns")
|
|
110
113
|
|
|
111
114
|
# Fit TF-IDF vectorizers if needed
|
|
112
115
|
if "tfidf" in self.config.features:
|
|
@@ -135,7 +138,7 @@ class TextEngine(BaseEngine):
|
|
|
135
138
|
|
|
136
139
|
except ImportError:
|
|
137
140
|
if self.config.verbose:
|
|
138
|
-
|
|
141
|
+
logger.warning("TextEngine: sklearn not available for TF-IDF, skipping")
|
|
139
142
|
|
|
140
143
|
def transform(self, X: Union[pd.DataFrame, np.ndarray], **kwargs) -> pd.DataFrame:
|
|
141
144
|
"""
|
|
@@ -191,7 +194,7 @@ class TextEngine(BaseEngine):
|
|
|
191
194
|
self._feature_names = [c for c in result.columns if c not in X.columns]
|
|
192
195
|
|
|
193
196
|
if self.config.verbose:
|
|
194
|
-
|
|
197
|
+
logger.info(f"TextEngine: Extracted {len(self._feature_names)} features")
|
|
195
198
|
|
|
196
199
|
return result
|
|
197
200
|
|
|
@@ -12,6 +12,9 @@ from pydantic import Field
|
|
|
12
12
|
|
|
13
13
|
from featcopilot.core.base import BaseEngine, EngineConfig
|
|
14
14
|
from featcopilot.core.feature import FeatureSet
|
|
15
|
+
from featcopilot.utils.logger import get_logger
|
|
16
|
+
|
|
17
|
+
logger = get_logger(__name__)
|
|
15
18
|
|
|
16
19
|
|
|
17
20
|
class TimeSeriesEngineConfig(EngineConfig):
|
|
@@ -123,7 +126,7 @@ class TimeSeriesEngine(BaseEngine):
|
|
|
123
126
|
self._time_columns = X.select_dtypes(include=[np.number]).columns.tolist()
|
|
124
127
|
|
|
125
128
|
if self.config.verbose:
|
|
126
|
-
|
|
129
|
+
logger.info(f"TimeSeriesEngine: Found {len(self._time_columns)} numeric columns")
|
|
127
130
|
|
|
128
131
|
self._is_fitted = True
|
|
129
132
|
return self
|
|
@@ -177,7 +180,7 @@ class TimeSeriesEngine(BaseEngine):
|
|
|
177
180
|
self._feature_names = list(result.columns)
|
|
178
181
|
|
|
179
182
|
if self.config.verbose:
|
|
180
|
-
|
|
183
|
+
logger.info(f"TimeSeriesEngine: Extracted {len(self._feature_names)} features")
|
|
181
184
|
|
|
182
185
|
return result
|
|
183
186
|
|
|
@@ -1,15 +1,18 @@
|
|
|
1
1
|
"""LLM-powered feature engineering module.
|
|
2
2
|
|
|
3
|
-
Uses GitHub Copilot SDK for intelligent feature generation.
|
|
3
|
+
Uses GitHub Copilot SDK or LiteLLM for intelligent feature generation.
|
|
4
4
|
"""
|
|
5
5
|
|
|
6
6
|
from featcopilot.llm.code_generator import FeatureCodeGenerator
|
|
7
7
|
from featcopilot.llm.copilot_client import CopilotFeatureClient
|
|
8
8
|
from featcopilot.llm.explainer import FeatureExplainer
|
|
9
|
+
from featcopilot.llm.litellm_client import LiteLLMFeatureClient, SyncLiteLLMFeatureClient
|
|
9
10
|
from featcopilot.llm.semantic_engine import SemanticEngine
|
|
10
11
|
|
|
11
12
|
__all__ = [
|
|
12
13
|
"CopilotFeatureClient",
|
|
14
|
+
"LiteLLMFeatureClient",
|
|
15
|
+
"SyncLiteLLMFeatureClient",
|
|
13
16
|
"SemanticEngine",
|
|
14
17
|
"FeatureExplainer",
|
|
15
18
|
"FeatureCodeGenerator",
|
|
@@ -10,6 +10,9 @@ import pandas as pd
|
|
|
10
10
|
|
|
11
11
|
from featcopilot.core.feature import Feature, FeatureOrigin, FeatureType
|
|
12
12
|
from featcopilot.llm.copilot_client import SyncCopilotFeatureClient
|
|
13
|
+
from featcopilot.utils.logger import get_logger
|
|
14
|
+
|
|
15
|
+
logger = get_logger(__name__)
|
|
13
16
|
|
|
14
17
|
|
|
15
18
|
class FeatureCodeGenerator:
|
|
@@ -21,7 +24,7 @@ class FeatureCodeGenerator:
|
|
|
21
24
|
|
|
22
25
|
Parameters
|
|
23
26
|
----------
|
|
24
|
-
model : str, default='gpt-5'
|
|
27
|
+
model : str, default='gpt-5.2'
|
|
25
28
|
LLM model to use
|
|
26
29
|
validate : bool, default=True
|
|
27
30
|
Whether to validate generated code
|
|
@@ -35,7 +38,7 @@ class FeatureCodeGenerator:
|
|
|
35
38
|
... )
|
|
36
39
|
"""
|
|
37
40
|
|
|
38
|
-
def __init__(self, model: str = "gpt-5", validate: bool = True, verbose: bool = False):
|
|
41
|
+
def __init__(self, model: str = "gpt-5.2", validate: bool = True, verbose: bool = False):
|
|
39
42
|
self.model = model
|
|
40
43
|
self.validate = validate
|
|
41
44
|
self.verbose = verbose
|
|
@@ -98,7 +101,7 @@ class FeatureCodeGenerator:
|
|
|
98
101
|
)
|
|
99
102
|
if not validation["valid"]:
|
|
100
103
|
if self.verbose:
|
|
101
|
-
|
|
104
|
+
logger.warning(f"Code validation failed: {validation['error']}")
|
|
102
105
|
# Try to fix common issues
|
|
103
106
|
code = self._fix_common_issues(code, validation["error"])
|
|
104
107
|
|
|
@@ -144,7 +147,7 @@ class FeatureCodeGenerator:
|
|
|
144
147
|
features.append(feature)
|
|
145
148
|
except Exception as e:
|
|
146
149
|
if self.verbose:
|
|
147
|
-
|
|
150
|
+
logger.error(f"Failed to generate feature for '{desc}': {e}")
|
|
148
151
|
|
|
149
152
|
return features
|
|
150
153
|
|
|
@@ -10,11 +10,15 @@ from typing import Any, Optional
|
|
|
10
10
|
|
|
11
11
|
from pydantic import BaseModel, Field
|
|
12
12
|
|
|
13
|
+
from featcopilot.utils.logger import get_logger
|
|
14
|
+
|
|
15
|
+
logger = get_logger(__name__)
|
|
16
|
+
|
|
13
17
|
|
|
14
18
|
class CopilotConfig(BaseModel):
|
|
15
19
|
"""Configuration for Copilot client."""
|
|
16
20
|
|
|
17
|
-
model: str = Field(default="gpt-5", description="Model to use")
|
|
21
|
+
model: str = Field(default="gpt-5.2", description="Model to use")
|
|
18
22
|
temperature: float = Field(default=0.3, ge=0, le=1, description="Temperature for generation")
|
|
19
23
|
max_tokens: int = Field(default=4096, description="Maximum tokens in response")
|
|
20
24
|
timeout: float = Field(default=60.0, description="Timeout in seconds")
|
|
@@ -35,12 +39,12 @@ class CopilotFeatureClient:
|
|
|
35
39
|
----------
|
|
36
40
|
config : CopilotConfig, optional
|
|
37
41
|
Configuration for the client
|
|
38
|
-
model : str, default='gpt-5'
|
|
42
|
+
model : str, default='gpt-5.2'
|
|
39
43
|
Model to use for generation
|
|
40
44
|
|
|
41
45
|
Examples
|
|
42
46
|
--------
|
|
43
|
-
>>> client = CopilotFeatureClient(model='gpt-5')
|
|
47
|
+
>>> client = CopilotFeatureClient(model='gpt-5.2')
|
|
44
48
|
>>> await client.start()
|
|
45
49
|
>>> suggestions = await client.suggest_features(
|
|
46
50
|
... column_info={'age': 'int', 'income': 'float'},
|
|
@@ -49,7 +53,7 @@ class CopilotFeatureClient:
|
|
|
49
53
|
>>> await client.stop()
|
|
50
54
|
"""
|
|
51
55
|
|
|
52
|
-
def __init__(self, config: Optional[CopilotConfig] = None, model: str = "gpt-5", **kwargs):
|
|
56
|
+
def __init__(self, config: Optional[CopilotConfig] = None, model: str = "gpt-5.2", **kwargs):
|
|
53
57
|
self.config = config or CopilotConfig(model=model, **kwargs)
|
|
54
58
|
self._client = None
|
|
55
59
|
self._session = None
|
|
@@ -82,13 +86,13 @@ class CopilotFeatureClient:
|
|
|
82
86
|
# Copilot SDK not installed - use mock mode
|
|
83
87
|
self._copilot_available = False
|
|
84
88
|
self._is_started = True
|
|
85
|
-
|
|
89
|
+
logger.warning("copilot-sdk not installed. Using mock LLM responses.")
|
|
86
90
|
|
|
87
91
|
except Exception as e:
|
|
88
92
|
# Copilot not available - use mock mode
|
|
89
93
|
self._copilot_available = False
|
|
90
94
|
self._is_started = True
|
|
91
|
-
|
|
95
|
+
logger.warning(f"Could not connect to Copilot: {e}. Using mock LLM responses.")
|
|
92
96
|
|
|
93
97
|
return self
|
|
94
98
|
|
|
@@ -469,7 +473,37 @@ result = df['col1'] / (df['col2'] + 1e-8)
|
|
|
469
473
|
local_vars = {"df": df, "np": np, "pd": pd}
|
|
470
474
|
exec(
|
|
471
475
|
code,
|
|
472
|
-
{
|
|
476
|
+
{
|
|
477
|
+
"__builtins__": {
|
|
478
|
+
"len": len,
|
|
479
|
+
"sum": sum,
|
|
480
|
+
"max": max,
|
|
481
|
+
"min": min,
|
|
482
|
+
"int": int,
|
|
483
|
+
"float": float,
|
|
484
|
+
"str": str,
|
|
485
|
+
"bool": bool,
|
|
486
|
+
"abs": abs,
|
|
487
|
+
"round": round,
|
|
488
|
+
"pow": pow,
|
|
489
|
+
"range": range,
|
|
490
|
+
"list": list,
|
|
491
|
+
"dict": dict,
|
|
492
|
+
"set": set,
|
|
493
|
+
"tuple": tuple,
|
|
494
|
+
"sorted": sorted,
|
|
495
|
+
"reversed": reversed,
|
|
496
|
+
"enumerate": enumerate,
|
|
497
|
+
"zip": zip,
|
|
498
|
+
"any": any,
|
|
499
|
+
"all": all,
|
|
500
|
+
"map": map,
|
|
501
|
+
"filter": filter,
|
|
502
|
+
"isinstance": isinstance,
|
|
503
|
+
"hasattr": hasattr,
|
|
504
|
+
"getattr": getattr,
|
|
505
|
+
}
|
|
506
|
+
},
|
|
473
507
|
local_vars,
|
|
474
508
|
)
|
|
475
509
|
|
|
@@ -489,33 +523,43 @@ class SyncCopilotFeatureClient:
|
|
|
489
523
|
|
|
490
524
|
def __init__(self, **kwargs):
|
|
491
525
|
self._async_client = CopilotFeatureClient(**kwargs)
|
|
492
|
-
self._loop = None
|
|
493
526
|
|
|
494
|
-
def
|
|
495
|
-
|
|
527
|
+
def _run_async(self, coro):
|
|
528
|
+
"""Run an async coroutine, handling nested event loops (e.g., Jupyter)."""
|
|
529
|
+
try:
|
|
530
|
+
# Check if we're in a running event loop (e.g., Jupyter)
|
|
531
|
+
loop = asyncio.get_running_loop()
|
|
532
|
+
# We're in a running loop - use nest_asyncio if available
|
|
496
533
|
try:
|
|
497
|
-
|
|
498
|
-
|
|
499
|
-
|
|
500
|
-
|
|
501
|
-
|
|
534
|
+
import nest_asyncio
|
|
535
|
+
|
|
536
|
+
nest_asyncio.apply()
|
|
537
|
+
return loop.run_until_complete(coro)
|
|
538
|
+
except ImportError:
|
|
539
|
+
# nest_asyncio not available, try alternative approach
|
|
540
|
+
import concurrent.futures
|
|
541
|
+
|
|
542
|
+
with concurrent.futures.ThreadPoolExecutor() as executor:
|
|
543
|
+
future = executor.submit(asyncio.run, coro)
|
|
544
|
+
return future.result()
|
|
545
|
+
except RuntimeError:
|
|
546
|
+
# No running event loop - safe to use asyncio.run
|
|
547
|
+
return asyncio.run(coro)
|
|
502
548
|
|
|
503
549
|
def start(self):
|
|
504
|
-
return self.
|
|
550
|
+
return self._run_async(self._async_client.start())
|
|
505
551
|
|
|
506
552
|
def stop(self):
|
|
507
|
-
return self.
|
|
553
|
+
return self._run_async(self._async_client.stop())
|
|
508
554
|
|
|
509
555
|
def suggest_features(self, **kwargs):
|
|
510
|
-
return self.
|
|
556
|
+
return self._run_async(self._async_client.suggest_features(**kwargs))
|
|
511
557
|
|
|
512
558
|
def explain_feature(self, **kwargs):
|
|
513
|
-
return self.
|
|
559
|
+
return self._run_async(self._async_client.explain_feature(**kwargs))
|
|
514
560
|
|
|
515
561
|
def generate_feature_code(self, **kwargs):
|
|
516
|
-
return self.
|
|
562
|
+
return self._run_async(self._async_client.generate_feature_code(**kwargs))
|
|
517
563
|
|
|
518
564
|
def validate_feature_code(self, code: str, sample_data=None):
|
|
519
|
-
return self.
|
|
520
|
-
self._async_client.validate_feature_code(code=code, sample_data=sample_data)
|
|
521
|
-
)
|
|
565
|
+
return self._run_async(self._async_client.validate_feature_code(code=code, sample_data=sample_data))
|
|
@@ -9,6 +9,9 @@ import pandas as pd
|
|
|
9
9
|
|
|
10
10
|
from featcopilot.core.feature import Feature, FeatureSet
|
|
11
11
|
from featcopilot.llm.copilot_client import SyncCopilotFeatureClient
|
|
12
|
+
from featcopilot.utils.logger import get_logger
|
|
13
|
+
|
|
14
|
+
logger = get_logger(__name__)
|
|
12
15
|
|
|
13
16
|
|
|
14
17
|
class FeatureExplainer:
|
|
@@ -20,7 +23,7 @@ class FeatureExplainer:
|
|
|
20
23
|
|
|
21
24
|
Parameters
|
|
22
25
|
----------
|
|
23
|
-
model : str, default='gpt-5'
|
|
26
|
+
model : str, default='gpt-5.2'
|
|
24
27
|
LLM model to use
|
|
25
28
|
|
|
26
29
|
Examples
|
|
@@ -29,7 +32,7 @@ class FeatureExplainer:
|
|
|
29
32
|
>>> explanations = explainer.explain_features(feature_set, task='predict churn')
|
|
30
33
|
"""
|
|
31
34
|
|
|
32
|
-
def __init__(self, model: str = "gpt-5", verbose: bool = False):
|
|
35
|
+
def __init__(self, model: str = "gpt-5.2", verbose: bool = False):
|
|
33
36
|
self.model = model
|
|
34
37
|
self.verbose = verbose
|
|
35
38
|
self._client: Optional[SyncCopilotFeatureClient] = None
|
|
@@ -115,7 +118,7 @@ class FeatureExplainer:
|
|
|
115
118
|
|
|
116
119
|
except Exception as e:
|
|
117
120
|
if self.verbose:
|
|
118
|
-
|
|
121
|
+
logger.error(f"Could not explain {feature.name}: {e}")
|
|
119
122
|
explanations[feature.name] = f"Feature based on: {', '.join(feature.source_columns)}"
|
|
120
123
|
|
|
121
124
|
return explanations
|