featcopilot 0.1.0__py3-none-any.whl → 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
featcopilot/__init__.py CHANGED
@@ -5,7 +5,9 @@ A unified feature engineering framework combining traditional approaches
5
5
  with novel LLM-powered capabilities via GitHub Copilot SDK.
6
6
  """
7
7
 
8
- __version__ = "0.1.0"
8
+ from importlib.metadata import version
9
+
10
+ __version__ = version("featcopilot")
9
11
  __author__ = "FeatCopilot Contributors"
10
12
 
11
13
  from featcopilot.core.base import BaseEngine, BaseSelector
@@ -7,6 +7,10 @@ from typing import Any, Optional
7
7
  import numpy as np
8
8
  import pandas as pd
9
9
 
10
+ from featcopilot.utils.logger import get_logger
11
+
12
+ logger = get_logger(__name__)
13
+
10
14
 
11
15
  class FeatureType(Enum):
12
16
  """Types of features."""
@@ -220,5 +224,5 @@ class FeatureSet:
220
224
  result[feature.name] = feature.compute(df)
221
225
  except Exception as e:
222
226
  # Log warning but continue
223
- print(f"Warning: Could not compute feature {feature.name}: {e}")
227
+ logger.warning(f"Could not compute feature {feature.name}: {e}")
224
228
  return result
@@ -11,6 +11,9 @@ from pydantic import Field
11
11
 
12
12
  from featcopilot.core.base import BaseEngine, EngineConfig
13
13
  from featcopilot.core.feature import FeatureSet
14
+ from featcopilot.utils.logger import get_logger
15
+
16
+ logger = get_logger(__name__)
14
17
 
15
18
 
16
19
  class RelationalEngineConfig(EngineConfig):
@@ -141,7 +144,7 @@ class RelationalEngine(BaseEngine):
141
144
  self._primary_columns = X.columns.tolist()
142
145
 
143
146
  if self.config.verbose:
144
- print(f"RelationalEngine: {len(self._relationships)} relationships defined")
147
+ logger.info(f"RelationalEngine: {len(self._relationships)} relationships defined")
145
148
 
146
149
  self._is_fitted = True
147
150
  return self
@@ -191,7 +194,7 @@ class RelationalEngine(BaseEngine):
191
194
  self._feature_names = [c for c in result.columns if c not in X.columns]
192
195
 
193
196
  if self.config.verbose:
194
- print(f"RelationalEngine: Generated {len(self._feature_names)} features")
197
+ logger.info(f"RelationalEngine: Generated {len(self._feature_names)} features")
195
198
 
196
199
  return result
197
200
 
@@ -12,6 +12,9 @@ from pydantic import Field
12
12
 
13
13
  from featcopilot.core.base import BaseEngine, EngineConfig
14
14
  from featcopilot.core.feature import Feature, FeatureOrigin, FeatureSet, FeatureType
15
+ from featcopilot.utils.logger import get_logger
16
+
17
+ logger = get_logger(__name__)
15
18
 
16
19
 
17
20
  class TabularEngineConfig(EngineConfig):
@@ -124,7 +127,7 @@ class TabularEngine(BaseEngine):
124
127
  ]
125
128
 
126
129
  if self.config.verbose:
127
- print(f"TabularEngine: Found {len(self._numeric_columns)} numeric columns")
130
+ logger.info(f"TabularEngine: Found {len(self._numeric_columns)} numeric columns")
128
131
 
129
132
  # Plan features to generate
130
133
  self._plan_features(X)
@@ -207,7 +210,7 @@ class TabularEngine(BaseEngine):
207
210
  self._feature_set.add(feature)
208
211
 
209
212
  if self.config.verbose:
210
- print(f"TabularEngine: Planned {len(self._feature_set)} features")
213
+ logger.info(f"TabularEngine: Planned {len(self._feature_set)} features")
211
214
 
212
215
  def transform(self, X: Union[pd.DataFrame, np.ndarray], **kwargs) -> pd.DataFrame:
213
216
  """
@@ -284,7 +287,7 @@ class TabularEngine(BaseEngine):
284
287
  self._feature_names = [c for c in result.columns if c not in X.columns]
285
288
 
286
289
  if self.config.verbose:
287
- print(f"TabularEngine: Generated {len(self._feature_names)} features")
290
+ logger.info(f"TabularEngine: Generated {len(self._feature_names)} features")
288
291
 
289
292
  return result
290
293
 
@@ -11,6 +11,9 @@ from pydantic import Field
11
11
 
12
12
  from featcopilot.core.base import BaseEngine, EngineConfig
13
13
  from featcopilot.core.feature import FeatureSet
14
+ from featcopilot.utils.logger import get_logger
15
+
16
+ logger = get_logger(__name__)
14
17
 
15
18
 
16
19
  class TextEngineConfig(EngineConfig):
@@ -106,7 +109,7 @@ class TextEngine(BaseEngine):
106
109
  ]
107
110
 
108
111
  if self.config.verbose:
109
- print(f"TextEngine: Found {len(self._text_columns)} text columns")
112
+ logger.info(f"TextEngine: Found {len(self._text_columns)} text columns")
110
113
 
111
114
  # Fit TF-IDF vectorizers if needed
112
115
  if "tfidf" in self.config.features:
@@ -135,7 +138,7 @@ class TextEngine(BaseEngine):
135
138
 
136
139
  except ImportError:
137
140
  if self.config.verbose:
138
- print("TextEngine: sklearn not available for TF-IDF, skipping")
141
+ logger.warning("TextEngine: sklearn not available for TF-IDF, skipping")
139
142
 
140
143
  def transform(self, X: Union[pd.DataFrame, np.ndarray], **kwargs) -> pd.DataFrame:
141
144
  """
@@ -191,7 +194,7 @@ class TextEngine(BaseEngine):
191
194
  self._feature_names = [c for c in result.columns if c not in X.columns]
192
195
 
193
196
  if self.config.verbose:
194
- print(f"TextEngine: Extracted {len(self._feature_names)} features")
197
+ logger.info(f"TextEngine: Extracted {len(self._feature_names)} features")
195
198
 
196
199
  return result
197
200
 
@@ -12,6 +12,9 @@ from pydantic import Field
12
12
 
13
13
  from featcopilot.core.base import BaseEngine, EngineConfig
14
14
  from featcopilot.core.feature import FeatureSet
15
+ from featcopilot.utils.logger import get_logger
16
+
17
+ logger = get_logger(__name__)
15
18
 
16
19
 
17
20
  class TimeSeriesEngineConfig(EngineConfig):
@@ -123,7 +126,7 @@ class TimeSeriesEngine(BaseEngine):
123
126
  self._time_columns = X.select_dtypes(include=[np.number]).columns.tolist()
124
127
 
125
128
  if self.config.verbose:
126
- print(f"TimeSeriesEngine: Found {len(self._time_columns)} numeric columns")
129
+ logger.info(f"TimeSeriesEngine: Found {len(self._time_columns)} numeric columns")
127
130
 
128
131
  self._is_fitted = True
129
132
  return self
@@ -177,7 +180,7 @@ class TimeSeriesEngine(BaseEngine):
177
180
  self._feature_names = list(result.columns)
178
181
 
179
182
  if self.config.verbose:
180
- print(f"TimeSeriesEngine: Extracted {len(self._feature_names)} features")
183
+ logger.info(f"TimeSeriesEngine: Extracted {len(self._feature_names)} features")
181
184
 
182
185
  return result
183
186
 
@@ -1,15 +1,18 @@
1
1
  """LLM-powered feature engineering module.
2
2
 
3
- Uses GitHub Copilot SDK for intelligent feature generation.
3
+ Uses GitHub Copilot SDK or LiteLLM for intelligent feature generation.
4
4
  """
5
5
 
6
6
  from featcopilot.llm.code_generator import FeatureCodeGenerator
7
7
  from featcopilot.llm.copilot_client import CopilotFeatureClient
8
8
  from featcopilot.llm.explainer import FeatureExplainer
9
+ from featcopilot.llm.litellm_client import LiteLLMFeatureClient, SyncLiteLLMFeatureClient
9
10
  from featcopilot.llm.semantic_engine import SemanticEngine
10
11
 
11
12
  __all__ = [
12
13
  "CopilotFeatureClient",
14
+ "LiteLLMFeatureClient",
15
+ "SyncLiteLLMFeatureClient",
13
16
  "SemanticEngine",
14
17
  "FeatureExplainer",
15
18
  "FeatureCodeGenerator",
@@ -10,6 +10,9 @@ import pandas as pd
10
10
 
11
11
  from featcopilot.core.feature import Feature, FeatureOrigin, FeatureType
12
12
  from featcopilot.llm.copilot_client import SyncCopilotFeatureClient
13
+ from featcopilot.utils.logger import get_logger
14
+
15
+ logger = get_logger(__name__)
13
16
 
14
17
 
15
18
  class FeatureCodeGenerator:
@@ -21,7 +24,7 @@ class FeatureCodeGenerator:
21
24
 
22
25
  Parameters
23
26
  ----------
24
- model : str, default='gpt-5'
27
+ model : str, default='gpt-5.2'
25
28
  LLM model to use
26
29
  validate : bool, default=True
27
30
  Whether to validate generated code
@@ -35,7 +38,7 @@ class FeatureCodeGenerator:
35
38
  ... )
36
39
  """
37
40
 
38
- def __init__(self, model: str = "gpt-5", validate: bool = True, verbose: bool = False):
41
+ def __init__(self, model: str = "gpt-5.2", validate: bool = True, verbose: bool = False):
39
42
  self.model = model
40
43
  self.validate = validate
41
44
  self.verbose = verbose
@@ -98,7 +101,7 @@ class FeatureCodeGenerator:
98
101
  )
99
102
  if not validation["valid"]:
100
103
  if self.verbose:
101
- print(f"Code validation failed: {validation['error']}")
104
+ logger.warning(f"Code validation failed: {validation['error']}")
102
105
  # Try to fix common issues
103
106
  code = self._fix_common_issues(code, validation["error"])
104
107
 
@@ -144,7 +147,7 @@ class FeatureCodeGenerator:
144
147
  features.append(feature)
145
148
  except Exception as e:
146
149
  if self.verbose:
147
- print(f"Failed to generate feature for '{desc}': {e}")
150
+ logger.error(f"Failed to generate feature for '{desc}': {e}")
148
151
 
149
152
  return features
150
153
 
@@ -10,11 +10,15 @@ from typing import Any, Optional
10
10
 
11
11
  from pydantic import BaseModel, Field
12
12
 
13
+ from featcopilot.utils.logger import get_logger
14
+
15
+ logger = get_logger(__name__)
16
+
13
17
 
14
18
  class CopilotConfig(BaseModel):
15
19
  """Configuration for Copilot client."""
16
20
 
17
- model: str = Field(default="gpt-5", description="Model to use")
21
+ model: str = Field(default="gpt-5.2", description="Model to use")
18
22
  temperature: float = Field(default=0.3, ge=0, le=1, description="Temperature for generation")
19
23
  max_tokens: int = Field(default=4096, description="Maximum tokens in response")
20
24
  timeout: float = Field(default=60.0, description="Timeout in seconds")
@@ -35,12 +39,12 @@ class CopilotFeatureClient:
35
39
  ----------
36
40
  config : CopilotConfig, optional
37
41
  Configuration for the client
38
- model : str, default='gpt-5'
42
+ model : str, default='gpt-5.2'
39
43
  Model to use for generation
40
44
 
41
45
  Examples
42
46
  --------
43
- >>> client = CopilotFeatureClient(model='gpt-5')
47
+ >>> client = CopilotFeatureClient(model='gpt-5.2')
44
48
  >>> await client.start()
45
49
  >>> suggestions = await client.suggest_features(
46
50
  ... column_info={'age': 'int', 'income': 'float'},
@@ -49,7 +53,7 @@ class CopilotFeatureClient:
49
53
  >>> await client.stop()
50
54
  """
51
55
 
52
- def __init__(self, config: Optional[CopilotConfig] = None, model: str = "gpt-5", **kwargs):
56
+ def __init__(self, config: Optional[CopilotConfig] = None, model: str = "gpt-5.2", **kwargs):
53
57
  self.config = config or CopilotConfig(model=model, **kwargs)
54
58
  self._client = None
55
59
  self._session = None
@@ -82,13 +86,13 @@ class CopilotFeatureClient:
82
86
  # Copilot SDK not installed - use mock mode
83
87
  self._copilot_available = False
84
88
  self._is_started = True
85
- print("Warning: copilot-sdk not installed. Using mock LLM responses.")
89
+ logger.warning("copilot-sdk not installed. Using mock LLM responses.")
86
90
 
87
91
  except Exception as e:
88
92
  # Copilot not available - use mock mode
89
93
  self._copilot_available = False
90
94
  self._is_started = True
91
- print(f"Warning: Could not connect to Copilot: {e}. Using mock LLM responses.")
95
+ logger.warning(f"Could not connect to Copilot: {e}. Using mock LLM responses.")
92
96
 
93
97
  return self
94
98
 
@@ -469,7 +473,37 @@ result = df['col1'] / (df['col2'] + 1e-8)
469
473
  local_vars = {"df": df, "np": np, "pd": pd}
470
474
  exec(
471
475
  code,
472
- {"__builtins__": {"len": len, "sum": sum, "max": max, "min": min}},
476
+ {
477
+ "__builtins__": {
478
+ "len": len,
479
+ "sum": sum,
480
+ "max": max,
481
+ "min": min,
482
+ "int": int,
483
+ "float": float,
484
+ "str": str,
485
+ "bool": bool,
486
+ "abs": abs,
487
+ "round": round,
488
+ "pow": pow,
489
+ "range": range,
490
+ "list": list,
491
+ "dict": dict,
492
+ "set": set,
493
+ "tuple": tuple,
494
+ "sorted": sorted,
495
+ "reversed": reversed,
496
+ "enumerate": enumerate,
497
+ "zip": zip,
498
+ "any": any,
499
+ "all": all,
500
+ "map": map,
501
+ "filter": filter,
502
+ "isinstance": isinstance,
503
+ "hasattr": hasattr,
504
+ "getattr": getattr,
505
+ }
506
+ },
473
507
  local_vars,
474
508
  )
475
509
 
@@ -489,33 +523,43 @@ class SyncCopilotFeatureClient:
489
523
 
490
524
  def __init__(self, **kwargs):
491
525
  self._async_client = CopilotFeatureClient(**kwargs)
492
- self._loop = None
493
526
 
494
- def _get_loop(self):
495
- if self._loop is None or self._loop.is_closed():
527
+ def _run_async(self, coro):
528
+ """Run an async coroutine, handling nested event loops (e.g., Jupyter)."""
529
+ try:
530
+ # Check if we're in a running event loop (e.g., Jupyter)
531
+ loop = asyncio.get_running_loop()
532
+ # We're in a running loop - use nest_asyncio if available
496
533
  try:
497
- self._loop = asyncio.get_event_loop()
498
- except RuntimeError:
499
- self._loop = asyncio.new_event_loop()
500
- asyncio.set_event_loop(self._loop)
501
- return self._loop
534
+ import nest_asyncio
535
+
536
+ nest_asyncio.apply()
537
+ return loop.run_until_complete(coro)
538
+ except ImportError:
539
+ # nest_asyncio not available, try alternative approach
540
+ import concurrent.futures
541
+
542
+ with concurrent.futures.ThreadPoolExecutor() as executor:
543
+ future = executor.submit(asyncio.run, coro)
544
+ return future.result()
545
+ except RuntimeError:
546
+ # No running event loop - safe to use asyncio.run
547
+ return asyncio.run(coro)
502
548
 
503
549
  def start(self):
504
- return self._get_loop().run_until_complete(self._async_client.start())
550
+ return self._run_async(self._async_client.start())
505
551
 
506
552
  def stop(self):
507
- return self._get_loop().run_until_complete(self._async_client.stop())
553
+ return self._run_async(self._async_client.stop())
508
554
 
509
555
  def suggest_features(self, **kwargs):
510
- return self._get_loop().run_until_complete(self._async_client.suggest_features(**kwargs))
556
+ return self._run_async(self._async_client.suggest_features(**kwargs))
511
557
 
512
558
  def explain_feature(self, **kwargs):
513
- return self._get_loop().run_until_complete(self._async_client.explain_feature(**kwargs))
559
+ return self._run_async(self._async_client.explain_feature(**kwargs))
514
560
 
515
561
  def generate_feature_code(self, **kwargs):
516
- return self._get_loop().run_until_complete(self._async_client.generate_feature_code(**kwargs))
562
+ return self._run_async(self._async_client.generate_feature_code(**kwargs))
517
563
 
518
564
  def validate_feature_code(self, code: str, sample_data=None):
519
- return self._get_loop().run_until_complete(
520
- self._async_client.validate_feature_code(code=code, sample_data=sample_data)
521
- )
565
+ return self._run_async(self._async_client.validate_feature_code(code=code, sample_data=sample_data))
@@ -9,6 +9,9 @@ import pandas as pd
9
9
 
10
10
  from featcopilot.core.feature import Feature, FeatureSet
11
11
  from featcopilot.llm.copilot_client import SyncCopilotFeatureClient
12
+ from featcopilot.utils.logger import get_logger
13
+
14
+ logger = get_logger(__name__)
12
15
 
13
16
 
14
17
  class FeatureExplainer:
@@ -20,7 +23,7 @@ class FeatureExplainer:
20
23
 
21
24
  Parameters
22
25
  ----------
23
- model : str, default='gpt-5'
26
+ model : str, default='gpt-5.2'
24
27
  LLM model to use
25
28
 
26
29
  Examples
@@ -29,7 +32,7 @@ class FeatureExplainer:
29
32
  >>> explanations = explainer.explain_features(feature_set, task='predict churn')
30
33
  """
31
34
 
32
- def __init__(self, model: str = "gpt-5", verbose: bool = False):
35
+ def __init__(self, model: str = "gpt-5.2", verbose: bool = False):
33
36
  self.model = model
34
37
  self.verbose = verbose
35
38
  self._client: Optional[SyncCopilotFeatureClient] = None
@@ -115,7 +118,7 @@ class FeatureExplainer:
115
118
 
116
119
  except Exception as e:
117
120
  if self.verbose:
118
- print(f"Could not explain {feature.name}: {e}")
121
+ logger.error(f"Could not explain {feature.name}: {e}")
119
122
  explanations[feature.name] = f"Feature based on: {', '.join(feature.source_columns)}"
120
123
 
121
124
  return explanations