featcopilot 0.1.0__tar.gz → 0.2.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. {featcopilot-0.1.0 → featcopilot-0.2.0}/PKG-INFO +32 -9
  2. {featcopilot-0.1.0 → featcopilot-0.2.0}/README.md +16 -8
  3. {featcopilot-0.1.0 → featcopilot-0.2.0}/featcopilot/__init__.py +3 -1
  4. {featcopilot-0.1.0 → featcopilot-0.2.0}/featcopilot/core/feature.py +5 -1
  5. {featcopilot-0.1.0 → featcopilot-0.2.0}/featcopilot/engines/relational.py +5 -2
  6. {featcopilot-0.1.0 → featcopilot-0.2.0}/featcopilot/engines/tabular.py +6 -3
  7. {featcopilot-0.1.0 → featcopilot-0.2.0}/featcopilot/engines/text.py +6 -3
  8. {featcopilot-0.1.0 → featcopilot-0.2.0}/featcopilot/engines/timeseries.py +5 -2
  9. {featcopilot-0.1.0 → featcopilot-0.2.0}/featcopilot/llm/__init__.py +4 -1
  10. {featcopilot-0.1.0 → featcopilot-0.2.0}/featcopilot/llm/code_generator.py +7 -4
  11. {featcopilot-0.1.0 → featcopilot-0.2.0}/featcopilot/llm/copilot_client.py +67 -23
  12. {featcopilot-0.1.0 → featcopilot-0.2.0}/featcopilot/llm/explainer.py +6 -3
  13. featcopilot-0.2.0/featcopilot/llm/litellm_client.py +595 -0
  14. {featcopilot-0.1.0 → featcopilot-0.2.0}/featcopilot/llm/semantic_engine.py +65 -16
  15. {featcopilot-0.1.0 → featcopilot-0.2.0}/featcopilot/selection/importance.py +5 -2
  16. {featcopilot-0.1.0 → featcopilot-0.2.0}/featcopilot/selection/redundancy.py +6 -3
  17. {featcopilot-0.1.0 → featcopilot-0.2.0}/featcopilot/selection/statistical.py +4 -1
  18. {featcopilot-0.1.0 → featcopilot-0.2.0}/featcopilot/selection/unified.py +4 -1
  19. featcopilot-0.2.0/featcopilot/stores/__init__.py +15 -0
  20. featcopilot-0.2.0/featcopilot/stores/base.py +166 -0
  21. featcopilot-0.2.0/featcopilot/stores/feast_store.py +541 -0
  22. {featcopilot-0.1.0 → featcopilot-0.2.0}/featcopilot/transformers/sklearn_compat.py +8 -5
  23. featcopilot-0.2.0/featcopilot/utils/__init__.py +23 -0
  24. featcopilot-0.2.0/featcopilot/utils/logger.py +47 -0
  25. featcopilot-0.2.0/featcopilot/utils/models.py +287 -0
  26. {featcopilot-0.1.0 → featcopilot-0.2.0}/featcopilot/utils/parallel.py +5 -1
  27. {featcopilot-0.1.0 → featcopilot-0.2.0}/featcopilot.egg-info/PKG-INFO +32 -9
  28. {featcopilot-0.1.0 → featcopilot-0.2.0}/featcopilot.egg-info/SOURCES.txt +9 -1
  29. {featcopilot-0.1.0 → featcopilot-0.2.0}/featcopilot.egg-info/requires.txt +18 -0
  30. {featcopilot-0.1.0 → featcopilot-0.2.0}/pyproject.toml +19 -1
  31. featcopilot-0.2.0/tests/test_litellm.py +249 -0
  32. featcopilot-0.2.0/tests/test_stores.py +261 -0
  33. featcopilot-0.1.0/featcopilot/utils/__init__.py +0 -9
  34. {featcopilot-0.1.0 → featcopilot-0.2.0}/featcopilot/core/__init__.py +0 -0
  35. {featcopilot-0.1.0 → featcopilot-0.2.0}/featcopilot/core/base.py +0 -0
  36. {featcopilot-0.1.0 → featcopilot-0.2.0}/featcopilot/core/registry.py +0 -0
  37. {featcopilot-0.1.0 → featcopilot-0.2.0}/featcopilot/engines/__init__.py +0 -0
  38. {featcopilot-0.1.0 → featcopilot-0.2.0}/featcopilot/selection/__init__.py +0 -0
  39. {featcopilot-0.1.0 → featcopilot-0.2.0}/featcopilot/transformers/__init__.py +0 -0
  40. {featcopilot-0.1.0 → featcopilot-0.2.0}/featcopilot/utils/cache.py +0 -0
  41. {featcopilot-0.1.0 → featcopilot-0.2.0}/featcopilot.egg-info/dependency_links.txt +0 -0
  42. {featcopilot-0.1.0 → featcopilot-0.2.0}/featcopilot.egg-info/top_level.txt +0 -0
  43. {featcopilot-0.1.0 → featcopilot-0.2.0}/setup.cfg +0 -0
  44. {featcopilot-0.1.0 → featcopilot-0.2.0}/tests/test_autofeat.py +0 -0
  45. {featcopilot-0.1.0 → featcopilot-0.2.0}/tests/test_core.py +0 -0
  46. {featcopilot-0.1.0 → featcopilot-0.2.0}/tests/test_engines.py +0 -0
  47. {featcopilot-0.1.0 → featcopilot-0.2.0}/tests/test_selection.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: featcopilot
3
- Version: 0.1.0
3
+ Version: 0.2.0
4
4
  Summary: Next-generation LLM-powered auto feature engineering framework with GitHub Copilot SDK
5
5
  Author: FeatCopilot Contributors
6
6
  License: MIT
@@ -28,11 +28,26 @@ Requires-Dist: pydantic>=2.0.0
28
28
  Requires-Dist: joblib>=1.1.0
29
29
  Provides-Extra: llm
30
30
  Requires-Dist: github-copilot-sdk>=0.1.0; extra == "llm"
31
+ Requires-Dist: nest_asyncio>=1.5.0; extra == "llm"
32
+ Provides-Extra: litellm
33
+ Requires-Dist: litellm>=1.0.0; extra == "litellm"
34
+ Requires-Dist: nest_asyncio>=1.5.0; extra == "litellm"
31
35
  Provides-Extra: timeseries
32
36
  Requires-Dist: statsmodels>=0.13.0; extra == "timeseries"
37
+ Provides-Extra: feast
38
+ Requires-Dist: feast>=0.30.0; extra == "feast"
33
39
  Provides-Extra: full
34
40
  Requires-Dist: github-copilot-sdk>=0.1.0; extra == "full"
41
+ Requires-Dist: litellm>=1.0.0; extra == "full"
35
42
  Requires-Dist: statsmodels>=0.13.0; extra == "full"
43
+ Requires-Dist: feast>=0.30.0; extra == "full"
44
+ Requires-Dist: nest_asyncio>=1.5.0; extra == "full"
45
+ Provides-Extra: benchmark
46
+ Requires-Dist: github-copilot-sdk>=0.1.0; extra == "benchmark"
47
+ Requires-Dist: statsmodels>=0.13.0; extra == "benchmark"
48
+ Requires-Dist: flaml[automl,blendsearch]>=2.0.0; extra == "benchmark"
49
+ Requires-Dist: autogluon.tabular>=1.0.0; extra == "benchmark"
50
+ Requires-Dist: h2o>=3.40.0; extra == "benchmark"
36
51
  Provides-Extra: dev
37
52
  Requires-Dist: pytest>=7.0.0; extra == "dev"
38
53
  Requires-Dist: pytest-asyncio>=0.21.0; extra == "dev"
@@ -44,9 +59,9 @@ Requires-Dist: pre-commit>=3.6.0; extra == "dev"
44
59
 
45
60
  # FeatCopilot 🚀
46
61
 
47
- **Next-Generation LLM-Powered Auto Feature Engineering with GitHub Copilot SDK**
62
+ **Next-Generation LLM-Powered Auto Feature Engineering Framework**
48
63
 
49
- FeatCopilot is a unified feature engineering framework that combines the best approaches from existing libraries (Featuretools, TSFresh, AutoFeat, OpenFE) with novel LLM-powered capabilities via GitHub Copilot SDK.
64
+ FeatCopilot automatically generates, selects, and explains predictive features using semantic understanding. It analyzes column meanings, applies domain-aware transformations, and provides human-readable explanations—turning raw data into ML-ready features in seconds.
50
65
 
51
66
  ## 📊 Benchmark Highlights
52
67
 
@@ -59,7 +74,7 @@ FeatCopilot is a unified feature engineering framework that combines the best ap
59
74
  | Classification | +0.54% | +4.35% |
60
75
  | Regression | +0.65% | +5.57% |
61
76
 
62
- ### LLM Engine (With Copilot - 30-60s)
77
+ ### LLM Engine (With LiteLLM - 30-60s)
63
78
 
64
79
  | Task Type | Average Improvement | Best Case |
65
80
  |-----------|--------------------:|----------:|
@@ -87,7 +102,7 @@ FeatCopilot is a unified feature engineering framework that combines the best ap
87
102
  # Basic installation
88
103
  pip install featcopilot
89
104
 
90
- # With LLM capabilities (requires GitHub Copilot)
105
+ # With LLM capabilities
91
106
  pip install featcopilot[llm]
92
107
 
93
108
  # Full installation
@@ -111,7 +126,7 @@ X_transformed = engineer.fit_transform(X, y) # <1 second
111
126
  print(f"Features: {X.shape[1]} -> {X_transformed.shape[1]}")
112
127
  ```
113
128
 
114
- ### LLM Mode (With Copilot)
129
+ ### LLM Mode (With LiteLLM)
115
130
 
116
131
  ```python
117
132
  from featcopilot import AutoFeatureEngineer
@@ -164,16 +179,24 @@ engine = TimeSeriesEngine(
164
179
  ```
165
180
 
166
181
  ### LLM Engine
167
- Uses GitHub Copilot SDK for intelligent feature generation.
182
+ Uses GitHub Copilot SDK (default) or LiteLLM (100+ providers) for intelligent feature generation.
168
183
 
169
184
  ```python
170
185
  from featcopilot.llm import SemanticEngine
171
186
 
187
+ # Default: GitHub Copilot SDK
172
188
  engine = SemanticEngine(
173
- model='gpt-5',
189
+ model='gpt-5.2',
174
190
  max_suggestions=20,
175
191
  validate_features=True
176
192
  )
193
+
194
+ # Alternative: LiteLLM backend
195
+ engine = SemanticEngine(
196
+ model='gpt-4o',
197
+ backend='litellm',
198
+ max_suggestions=20
199
+ )
177
200
  ```
178
201
 
179
202
  ## Feature Selection
@@ -211,7 +234,7 @@ X_selected = selector.fit_transform(X, y)
211
234
 
212
235
  - Python 3.9+
213
236
  - NumPy, Pandas, Scikit-learn
214
- - GitHub Copilot CLI (for LLM features)
237
+ - GitHub Copilot SDK (default) or LiteLLM (for 100+ LLM providers)
215
238
 
216
239
  ## License
217
240
 
@@ -1,8 +1,8 @@
1
1
  # FeatCopilot 🚀
2
2
 
3
- **Next-Generation LLM-Powered Auto Feature Engineering with GitHub Copilot SDK**
3
+ **Next-Generation LLM-Powered Auto Feature Engineering Framework**
4
4
 
5
- FeatCopilot is a unified feature engineering framework that combines the best approaches from existing libraries (Featuretools, TSFresh, AutoFeat, OpenFE) with novel LLM-powered capabilities via GitHub Copilot SDK.
5
+ FeatCopilot automatically generates, selects, and explains predictive features using semantic understanding. It analyzes column meanings, applies domain-aware transformations, and provides human-readable explanations—turning raw data into ML-ready features in seconds.
6
6
 
7
7
  ## 📊 Benchmark Highlights
8
8
 
@@ -15,7 +15,7 @@ FeatCopilot is a unified feature engineering framework that combines the best ap
15
15
  | Classification | +0.54% | +4.35% |
16
16
  | Regression | +0.65% | +5.57% |
17
17
 
18
- ### LLM Engine (With Copilot - 30-60s)
18
+ ### LLM Engine (With LiteLLM - 30-60s)
19
19
 
20
20
  | Task Type | Average Improvement | Best Case |
21
21
  |-----------|--------------------:|----------:|
@@ -43,7 +43,7 @@ FeatCopilot is a unified feature engineering framework that combines the best ap
43
43
  # Basic installation
44
44
  pip install featcopilot
45
45
 
46
- # With LLM capabilities (requires GitHub Copilot)
46
+ # With LLM capabilities
47
47
  pip install featcopilot[llm]
48
48
 
49
49
  # Full installation
@@ -67,7 +67,7 @@ X_transformed = engineer.fit_transform(X, y) # <1 second
67
67
  print(f"Features: {X.shape[1]} -> {X_transformed.shape[1]}")
68
68
  ```
69
69
 
70
- ### LLM Mode (With Copilot)
70
+ ### LLM Mode (With LiteLLM)
71
71
 
72
72
  ```python
73
73
  from featcopilot import AutoFeatureEngineer
@@ -120,16 +120,24 @@ engine = TimeSeriesEngine(
120
120
  ```
121
121
 
122
122
  ### LLM Engine
123
- Uses GitHub Copilot SDK for intelligent feature generation.
123
+ Uses GitHub Copilot SDK (default) or LiteLLM (100+ providers) for intelligent feature generation.
124
124
 
125
125
  ```python
126
126
  from featcopilot.llm import SemanticEngine
127
127
 
128
+ # Default: GitHub Copilot SDK
128
129
  engine = SemanticEngine(
129
- model='gpt-5',
130
+ model='gpt-5.2',
130
131
  max_suggestions=20,
131
132
  validate_features=True
132
133
  )
134
+
135
+ # Alternative: LiteLLM backend
136
+ engine = SemanticEngine(
137
+ model='gpt-4o',
138
+ backend='litellm',
139
+ max_suggestions=20
140
+ )
133
141
  ```
134
142
 
135
143
  ## Feature Selection
@@ -167,7 +175,7 @@ X_selected = selector.fit_transform(X, y)
167
175
 
168
176
  - Python 3.9+
169
177
  - NumPy, Pandas, Scikit-learn
170
- - GitHub Copilot CLI (for LLM features)
178
+ - GitHub Copilot SDK (default) or LiteLLM (for 100+ LLM providers)
171
179
 
172
180
  ## License
173
181
 
@@ -5,7 +5,9 @@ A unified feature engineering framework combining traditional approaches
5
5
  with novel LLM-powered capabilities via GitHub Copilot SDK.
6
6
  """
7
7
 
8
- __version__ = "0.1.0"
8
+ from importlib.metadata import version
9
+
10
+ __version__ = version("featcopilot")
9
11
  __author__ = "FeatCopilot Contributors"
10
12
 
11
13
  from featcopilot.core.base import BaseEngine, BaseSelector
@@ -7,6 +7,10 @@ from typing import Any, Optional
7
7
  import numpy as np
8
8
  import pandas as pd
9
9
 
10
+ from featcopilot.utils.logger import get_logger
11
+
12
+ logger = get_logger(__name__)
13
+
10
14
 
11
15
  class FeatureType(Enum):
12
16
  """Types of features."""
@@ -220,5 +224,5 @@ class FeatureSet:
220
224
  result[feature.name] = feature.compute(df)
221
225
  except Exception as e:
222
226
  # Log warning but continue
223
- print(f"Warning: Could not compute feature {feature.name}: {e}")
227
+ logger.warning(f"Could not compute feature {feature.name}: {e}")
224
228
  return result
@@ -11,6 +11,9 @@ from pydantic import Field
11
11
 
12
12
  from featcopilot.core.base import BaseEngine, EngineConfig
13
13
  from featcopilot.core.feature import FeatureSet
14
+ from featcopilot.utils.logger import get_logger
15
+
16
+ logger = get_logger(__name__)
14
17
 
15
18
 
16
19
  class RelationalEngineConfig(EngineConfig):
@@ -141,7 +144,7 @@ class RelationalEngine(BaseEngine):
141
144
  self._primary_columns = X.columns.tolist()
142
145
 
143
146
  if self.config.verbose:
144
- print(f"RelationalEngine: {len(self._relationships)} relationships defined")
147
+ logger.info(f"RelationalEngine: {len(self._relationships)} relationships defined")
145
148
 
146
149
  self._is_fitted = True
147
150
  return self
@@ -191,7 +194,7 @@ class RelationalEngine(BaseEngine):
191
194
  self._feature_names = [c for c in result.columns if c not in X.columns]
192
195
 
193
196
  if self.config.verbose:
194
- print(f"RelationalEngine: Generated {len(self._feature_names)} features")
197
+ logger.info(f"RelationalEngine: Generated {len(self._feature_names)} features")
195
198
 
196
199
  return result
197
200
 
@@ -12,6 +12,9 @@ from pydantic import Field
12
12
 
13
13
  from featcopilot.core.base import BaseEngine, EngineConfig
14
14
  from featcopilot.core.feature import Feature, FeatureOrigin, FeatureSet, FeatureType
15
+ from featcopilot.utils.logger import get_logger
16
+
17
+ logger = get_logger(__name__)
15
18
 
16
19
 
17
20
  class TabularEngineConfig(EngineConfig):
@@ -124,7 +127,7 @@ class TabularEngine(BaseEngine):
124
127
  ]
125
128
 
126
129
  if self.config.verbose:
127
- print(f"TabularEngine: Found {len(self._numeric_columns)} numeric columns")
130
+ logger.info(f"TabularEngine: Found {len(self._numeric_columns)} numeric columns")
128
131
 
129
132
  # Plan features to generate
130
133
  self._plan_features(X)
@@ -207,7 +210,7 @@ class TabularEngine(BaseEngine):
207
210
  self._feature_set.add(feature)
208
211
 
209
212
  if self.config.verbose:
210
- print(f"TabularEngine: Planned {len(self._feature_set)} features")
213
+ logger.info(f"TabularEngine: Planned {len(self._feature_set)} features")
211
214
 
212
215
  def transform(self, X: Union[pd.DataFrame, np.ndarray], **kwargs) -> pd.DataFrame:
213
216
  """
@@ -284,7 +287,7 @@ class TabularEngine(BaseEngine):
284
287
  self._feature_names = [c for c in result.columns if c not in X.columns]
285
288
 
286
289
  if self.config.verbose:
287
- print(f"TabularEngine: Generated {len(self._feature_names)} features")
290
+ logger.info(f"TabularEngine: Generated {len(self._feature_names)} features")
288
291
 
289
292
  return result
290
293
 
@@ -11,6 +11,9 @@ from pydantic import Field
11
11
 
12
12
  from featcopilot.core.base import BaseEngine, EngineConfig
13
13
  from featcopilot.core.feature import FeatureSet
14
+ from featcopilot.utils.logger import get_logger
15
+
16
+ logger = get_logger(__name__)
14
17
 
15
18
 
16
19
  class TextEngineConfig(EngineConfig):
@@ -106,7 +109,7 @@ class TextEngine(BaseEngine):
106
109
  ]
107
110
 
108
111
  if self.config.verbose:
109
- print(f"TextEngine: Found {len(self._text_columns)} text columns")
112
+ logger.info(f"TextEngine: Found {len(self._text_columns)} text columns")
110
113
 
111
114
  # Fit TF-IDF vectorizers if needed
112
115
  if "tfidf" in self.config.features:
@@ -135,7 +138,7 @@ class TextEngine(BaseEngine):
135
138
 
136
139
  except ImportError:
137
140
  if self.config.verbose:
138
- print("TextEngine: sklearn not available for TF-IDF, skipping")
141
+ logger.warning("TextEngine: sklearn not available for TF-IDF, skipping")
139
142
 
140
143
  def transform(self, X: Union[pd.DataFrame, np.ndarray], **kwargs) -> pd.DataFrame:
141
144
  """
@@ -191,7 +194,7 @@ class TextEngine(BaseEngine):
191
194
  self._feature_names = [c for c in result.columns if c not in X.columns]
192
195
 
193
196
  if self.config.verbose:
194
- print(f"TextEngine: Extracted {len(self._feature_names)} features")
197
+ logger.info(f"TextEngine: Extracted {len(self._feature_names)} features")
195
198
 
196
199
  return result
197
200
 
@@ -12,6 +12,9 @@ from pydantic import Field
12
12
 
13
13
  from featcopilot.core.base import BaseEngine, EngineConfig
14
14
  from featcopilot.core.feature import FeatureSet
15
+ from featcopilot.utils.logger import get_logger
16
+
17
+ logger = get_logger(__name__)
15
18
 
16
19
 
17
20
  class TimeSeriesEngineConfig(EngineConfig):
@@ -123,7 +126,7 @@ class TimeSeriesEngine(BaseEngine):
123
126
  self._time_columns = X.select_dtypes(include=[np.number]).columns.tolist()
124
127
 
125
128
  if self.config.verbose:
126
- print(f"TimeSeriesEngine: Found {len(self._time_columns)} numeric columns")
129
+ logger.info(f"TimeSeriesEngine: Found {len(self._time_columns)} numeric columns")
127
130
 
128
131
  self._is_fitted = True
129
132
  return self
@@ -177,7 +180,7 @@ class TimeSeriesEngine(BaseEngine):
177
180
  self._feature_names = list(result.columns)
178
181
 
179
182
  if self.config.verbose:
180
- print(f"TimeSeriesEngine: Extracted {len(self._feature_names)} features")
183
+ logger.info(f"TimeSeriesEngine: Extracted {len(self._feature_names)} features")
181
184
 
182
185
  return result
183
186
 
@@ -1,15 +1,18 @@
1
1
  """LLM-powered feature engineering module.
2
2
 
3
- Uses GitHub Copilot SDK for intelligent feature generation.
3
+ Uses GitHub Copilot SDK or LiteLLM for intelligent feature generation.
4
4
  """
5
5
 
6
6
  from featcopilot.llm.code_generator import FeatureCodeGenerator
7
7
  from featcopilot.llm.copilot_client import CopilotFeatureClient
8
8
  from featcopilot.llm.explainer import FeatureExplainer
9
+ from featcopilot.llm.litellm_client import LiteLLMFeatureClient, SyncLiteLLMFeatureClient
9
10
  from featcopilot.llm.semantic_engine import SemanticEngine
10
11
 
11
12
  __all__ = [
12
13
  "CopilotFeatureClient",
14
+ "LiteLLMFeatureClient",
15
+ "SyncLiteLLMFeatureClient",
13
16
  "SemanticEngine",
14
17
  "FeatureExplainer",
15
18
  "FeatureCodeGenerator",
@@ -10,6 +10,9 @@ import pandas as pd
10
10
 
11
11
  from featcopilot.core.feature import Feature, FeatureOrigin, FeatureType
12
12
  from featcopilot.llm.copilot_client import SyncCopilotFeatureClient
13
+ from featcopilot.utils.logger import get_logger
14
+
15
+ logger = get_logger(__name__)
13
16
 
14
17
 
15
18
  class FeatureCodeGenerator:
@@ -21,7 +24,7 @@ class FeatureCodeGenerator:
21
24
 
22
25
  Parameters
23
26
  ----------
24
- model : str, default='gpt-5'
27
+ model : str, default='gpt-5.2'
25
28
  LLM model to use
26
29
  validate : bool, default=True
27
30
  Whether to validate generated code
@@ -35,7 +38,7 @@ class FeatureCodeGenerator:
35
38
  ... )
36
39
  """
37
40
 
38
- def __init__(self, model: str = "gpt-5", validate: bool = True, verbose: bool = False):
41
+ def __init__(self, model: str = "gpt-5.2", validate: bool = True, verbose: bool = False):
39
42
  self.model = model
40
43
  self.validate = validate
41
44
  self.verbose = verbose
@@ -98,7 +101,7 @@ class FeatureCodeGenerator:
98
101
  )
99
102
  if not validation["valid"]:
100
103
  if self.verbose:
101
- print(f"Code validation failed: {validation['error']}")
104
+ logger.warning(f"Code validation failed: {validation['error']}")
102
105
  # Try to fix common issues
103
106
  code = self._fix_common_issues(code, validation["error"])
104
107
 
@@ -144,7 +147,7 @@ class FeatureCodeGenerator:
144
147
  features.append(feature)
145
148
  except Exception as e:
146
149
  if self.verbose:
147
- print(f"Failed to generate feature for '{desc}': {e}")
150
+ logger.error(f"Failed to generate feature for '{desc}': {e}")
148
151
 
149
152
  return features
150
153
 
@@ -10,11 +10,15 @@ from typing import Any, Optional
10
10
 
11
11
  from pydantic import BaseModel, Field
12
12
 
13
+ from featcopilot.utils.logger import get_logger
14
+
15
+ logger = get_logger(__name__)
16
+
13
17
 
14
18
  class CopilotConfig(BaseModel):
15
19
  """Configuration for Copilot client."""
16
20
 
17
- model: str = Field(default="gpt-5", description="Model to use")
21
+ model: str = Field(default="gpt-5.2", description="Model to use")
18
22
  temperature: float = Field(default=0.3, ge=0, le=1, description="Temperature for generation")
19
23
  max_tokens: int = Field(default=4096, description="Maximum tokens in response")
20
24
  timeout: float = Field(default=60.0, description="Timeout in seconds")
@@ -35,12 +39,12 @@ class CopilotFeatureClient:
35
39
  ----------
36
40
  config : CopilotConfig, optional
37
41
  Configuration for the client
38
- model : str, default='gpt-5'
42
+ model : str, default='gpt-5.2'
39
43
  Model to use for generation
40
44
 
41
45
  Examples
42
46
  --------
43
- >>> client = CopilotFeatureClient(model='gpt-5')
47
+ >>> client = CopilotFeatureClient(model='gpt-5.2')
44
48
  >>> await client.start()
45
49
  >>> suggestions = await client.suggest_features(
46
50
  ... column_info={'age': 'int', 'income': 'float'},
@@ -49,7 +53,7 @@ class CopilotFeatureClient:
49
53
  >>> await client.stop()
50
54
  """
51
55
 
52
- def __init__(self, config: Optional[CopilotConfig] = None, model: str = "gpt-5", **kwargs):
56
+ def __init__(self, config: Optional[CopilotConfig] = None, model: str = "gpt-5.2", **kwargs):
53
57
  self.config = config or CopilotConfig(model=model, **kwargs)
54
58
  self._client = None
55
59
  self._session = None
@@ -82,13 +86,13 @@ class CopilotFeatureClient:
82
86
  # Copilot SDK not installed - use mock mode
83
87
  self._copilot_available = False
84
88
  self._is_started = True
85
- print("Warning: copilot-sdk not installed. Using mock LLM responses.")
89
+ logger.warning("copilot-sdk not installed. Using mock LLM responses.")
86
90
 
87
91
  except Exception as e:
88
92
  # Copilot not available - use mock mode
89
93
  self._copilot_available = False
90
94
  self._is_started = True
91
- print(f"Warning: Could not connect to Copilot: {e}. Using mock LLM responses.")
95
+ logger.warning(f"Could not connect to Copilot: {e}. Using mock LLM responses.")
92
96
 
93
97
  return self
94
98
 
@@ -469,7 +473,37 @@ result = df['col1'] / (df['col2'] + 1e-8)
469
473
  local_vars = {"df": df, "np": np, "pd": pd}
470
474
  exec(
471
475
  code,
472
- {"__builtins__": {"len": len, "sum": sum, "max": max, "min": min}},
476
+ {
477
+ "__builtins__": {
478
+ "len": len,
479
+ "sum": sum,
480
+ "max": max,
481
+ "min": min,
482
+ "int": int,
483
+ "float": float,
484
+ "str": str,
485
+ "bool": bool,
486
+ "abs": abs,
487
+ "round": round,
488
+ "pow": pow,
489
+ "range": range,
490
+ "list": list,
491
+ "dict": dict,
492
+ "set": set,
493
+ "tuple": tuple,
494
+ "sorted": sorted,
495
+ "reversed": reversed,
496
+ "enumerate": enumerate,
497
+ "zip": zip,
498
+ "any": any,
499
+ "all": all,
500
+ "map": map,
501
+ "filter": filter,
502
+ "isinstance": isinstance,
503
+ "hasattr": hasattr,
504
+ "getattr": getattr,
505
+ }
506
+ },
473
507
  local_vars,
474
508
  )
475
509
 
@@ -489,33 +523,43 @@ class SyncCopilotFeatureClient:
489
523
 
490
524
  def __init__(self, **kwargs):
491
525
  self._async_client = CopilotFeatureClient(**kwargs)
492
- self._loop = None
493
526
 
494
- def _get_loop(self):
495
- if self._loop is None or self._loop.is_closed():
527
+ def _run_async(self, coro):
528
+ """Run an async coroutine, handling nested event loops (e.g., Jupyter)."""
529
+ try:
530
+ # Check if we're in a running event loop (e.g., Jupyter)
531
+ loop = asyncio.get_running_loop()
532
+ # We're in a running loop - use nest_asyncio if available
496
533
  try:
497
- self._loop = asyncio.get_event_loop()
498
- except RuntimeError:
499
- self._loop = asyncio.new_event_loop()
500
- asyncio.set_event_loop(self._loop)
501
- return self._loop
534
+ import nest_asyncio
535
+
536
+ nest_asyncio.apply()
537
+ return loop.run_until_complete(coro)
538
+ except ImportError:
539
+ # nest_asyncio not available, try alternative approach
540
+ import concurrent.futures
541
+
542
+ with concurrent.futures.ThreadPoolExecutor() as executor:
543
+ future = executor.submit(asyncio.run, coro)
544
+ return future.result()
545
+ except RuntimeError:
546
+ # No running event loop - safe to use asyncio.run
547
+ return asyncio.run(coro)
502
548
 
503
549
  def start(self):
504
- return self._get_loop().run_until_complete(self._async_client.start())
550
+ return self._run_async(self._async_client.start())
505
551
 
506
552
  def stop(self):
507
- return self._get_loop().run_until_complete(self._async_client.stop())
553
+ return self._run_async(self._async_client.stop())
508
554
 
509
555
  def suggest_features(self, **kwargs):
510
- return self._get_loop().run_until_complete(self._async_client.suggest_features(**kwargs))
556
+ return self._run_async(self._async_client.suggest_features(**kwargs))
511
557
 
512
558
  def explain_feature(self, **kwargs):
513
- return self._get_loop().run_until_complete(self._async_client.explain_feature(**kwargs))
559
+ return self._run_async(self._async_client.explain_feature(**kwargs))
514
560
 
515
561
  def generate_feature_code(self, **kwargs):
516
- return self._get_loop().run_until_complete(self._async_client.generate_feature_code(**kwargs))
562
+ return self._run_async(self._async_client.generate_feature_code(**kwargs))
517
563
 
518
564
  def validate_feature_code(self, code: str, sample_data=None):
519
- return self._get_loop().run_until_complete(
520
- self._async_client.validate_feature_code(code=code, sample_data=sample_data)
521
- )
565
+ return self._run_async(self._async_client.validate_feature_code(code=code, sample_data=sample_data))
@@ -9,6 +9,9 @@ import pandas as pd
9
9
 
10
10
  from featcopilot.core.feature import Feature, FeatureSet
11
11
  from featcopilot.llm.copilot_client import SyncCopilotFeatureClient
12
+ from featcopilot.utils.logger import get_logger
13
+
14
+ logger = get_logger(__name__)
12
15
 
13
16
 
14
17
  class FeatureExplainer:
@@ -20,7 +23,7 @@ class FeatureExplainer:
20
23
 
21
24
  Parameters
22
25
  ----------
23
- model : str, default='gpt-5'
26
+ model : str, default='gpt-5.2'
24
27
  LLM model to use
25
28
 
26
29
  Examples
@@ -29,7 +32,7 @@ class FeatureExplainer:
29
32
  >>> explanations = explainer.explain_features(feature_set, task='predict churn')
30
33
  """
31
34
 
32
- def __init__(self, model: str = "gpt-5", verbose: bool = False):
35
+ def __init__(self, model: str = "gpt-5.2", verbose: bool = False):
33
36
  self.model = model
34
37
  self.verbose = verbose
35
38
  self._client: Optional[SyncCopilotFeatureClient] = None
@@ -115,7 +118,7 @@ class FeatureExplainer:
115
118
 
116
119
  except Exception as e:
117
120
  if self.verbose:
118
- print(f"Could not explain {feature.name}: {e}")
121
+ logger.error(f"Could not explain {feature.name}: {e}")
119
122
  explanations[feature.name] = f"Feature based on: {', '.join(feature.source_columns)}"
120
123
 
121
124
  return explanations