featcopilot 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,109 @@
1
+ """Parallel processing utilities."""
2
+
3
+ from typing import Any, Callable, Optional
4
+
5
+ import pandas as pd
6
+
7
+
8
+ def parallel_apply(
9
+ func: Callable,
10
+ data: pd.DataFrame,
11
+ n_jobs: int = -1,
12
+ batch_size: Optional[int] = None,
13
+ verbose: bool = False,
14
+ ) -> list[Any]:
15
+ """
16
+ Apply a function in parallel across DataFrame rows.
17
+
18
+ Parameters
19
+ ----------
20
+ func : callable
21
+ Function to apply to each row
22
+ data : DataFrame
23
+ Input data
24
+ n_jobs : int, default=-1
25
+ Number of parallel jobs (-1 for all CPUs)
26
+ batch_size : int, optional
27
+ Batch size for processing
28
+ verbose : bool, default=False
29
+ Show progress
30
+
31
+ Returns
32
+ -------
33
+ results : list
34
+ Results from applying function to each row
35
+ """
36
+ try:
37
+ from joblib import Parallel, delayed
38
+
39
+ if n_jobs == -1:
40
+ import os
41
+
42
+ n_jobs = os.cpu_count() or 1
43
+
44
+ if batch_size is None:
45
+ batch_size = max(1, len(data) // (n_jobs * 4))
46
+
47
+ results = Parallel(n_jobs=n_jobs, verbose=1 if verbose else 0)(delayed(func)(row) for _, row in data.iterrows())
48
+
49
+ return results
50
+
51
+ except ImportError:
52
+ # Fallback to sequential processing
53
+ if verbose:
54
+ print("joblib not available, using sequential processing")
55
+
56
+ return [func(row) for _, row in data.iterrows()]
57
+
58
+
59
+ def parallel_transform(
60
+ transformers: list[tuple], X: pd.DataFrame, n_jobs: int = -1, verbose: bool = False
61
+ ) -> pd.DataFrame:
62
+ """
63
+ Apply multiple transformers in parallel.
64
+
65
+ Parameters
66
+ ----------
67
+ transformers : list
68
+ List of (name, transformer) tuples
69
+ X : DataFrame
70
+ Input data
71
+ n_jobs : int, default=-1
72
+ Number of parallel jobs
73
+ verbose : bool, default=False
74
+ Show progress
75
+
76
+ Returns
77
+ -------
78
+ X_combined : DataFrame
79
+ Combined transformed data
80
+ """
81
+ try:
82
+ from joblib import Parallel, delayed
83
+
84
+ def apply_transformer(name, transformer, X):
85
+ return name, transformer.transform(X)
86
+
87
+ results = Parallel(n_jobs=n_jobs, verbose=1 if verbose else 0)(
88
+ delayed(apply_transformer)(name, t, X) for name, t in transformers
89
+ )
90
+
91
+ # Combine results
92
+ combined = X.copy()
93
+ for _, transformed in results:
94
+ new_cols = [c for c in transformed.columns if c not in combined.columns]
95
+ for col in new_cols:
96
+ combined[col] = transformed[col]
97
+
98
+ return combined
99
+
100
+ except ImportError:
101
+ # Sequential fallback
102
+ combined = X.copy()
103
+ for _, transformer in transformers:
104
+ transformed = transformer.transform(X)
105
+ new_cols = [c for c in transformed.columns if c not in combined.columns]
106
+ for col in new_cols:
107
+ combined[col] = transformed[col]
108
+
109
+ return combined
@@ -0,0 +1,218 @@
1
+ Metadata-Version: 2.4
2
+ Name: featcopilot
3
+ Version: 0.1.0
4
+ Summary: Next-generation LLM-powered auto feature engineering framework with GitHub Copilot SDK
5
+ Author: FeatCopilot Contributors
6
+ License: MIT
7
+ Project-URL: Homepage, https://github.com/thinkall/featcopilot
8
+ Project-URL: Documentation, https://github.com/thinkall/featcopilot#readme
9
+ Project-URL: Repository, https://github.com/thinkall/featcopilot
10
+ Keywords: machine-learning,feature-engineering,automl,llm,copilot,data-science
11
+ Classifier: Development Status :: 4 - Beta
12
+ Classifier: Intended Audience :: Developers
13
+ Classifier: Intended Audience :: Science/Research
14
+ Classifier: License :: OSI Approved :: MIT License
15
+ Classifier: Programming Language :: Python :: 3
16
+ Classifier: Programming Language :: Python :: 3.9
17
+ Classifier: Programming Language :: Python :: 3.10
18
+ Classifier: Programming Language :: Python :: 3.11
19
+ Classifier: Programming Language :: Python :: 3.12
20
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
21
+ Requires-Python: >=3.9
22
+ Description-Content-Type: text/markdown
23
+ Requires-Dist: numpy>=1.21.0
24
+ Requires-Dist: pandas>=1.3.0
25
+ Requires-Dist: scipy>=1.7.0
26
+ Requires-Dist: scikit-learn>=1.0.0
27
+ Requires-Dist: pydantic>=2.0.0
28
+ Requires-Dist: joblib>=1.1.0
29
+ Provides-Extra: llm
30
+ Requires-Dist: github-copilot-sdk>=0.1.0; extra == "llm"
31
+ Provides-Extra: timeseries
32
+ Requires-Dist: statsmodels>=0.13.0; extra == "timeseries"
33
+ Provides-Extra: full
34
+ Requires-Dist: github-copilot-sdk>=0.1.0; extra == "full"
35
+ Requires-Dist: statsmodels>=0.13.0; extra == "full"
36
+ Provides-Extra: dev
37
+ Requires-Dist: pytest>=7.0.0; extra == "dev"
38
+ Requires-Dist: pytest-asyncio>=0.21.0; extra == "dev"
39
+ Requires-Dist: pytest-cov>=4.0.0; extra == "dev"
40
+ Requires-Dist: black>=23.0.0; extra == "dev"
41
+ Requires-Dist: ruff>=0.1.0; extra == "dev"
42
+ Requires-Dist: mypy>=1.0.0; extra == "dev"
43
+ Requires-Dist: pre-commit>=3.6.0; extra == "dev"
44
+
45
+ # FeatCopilot 🚀
46
+
47
+ **Next-Generation LLM-Powered Auto Feature Engineering with GitHub Copilot SDK**
48
+
49
+ FeatCopilot is a unified feature engineering framework that combines the best approaches from existing libraries (Featuretools, TSFresh, AutoFeat, OpenFE) with novel LLM-powered capabilities via GitHub Copilot SDK.
50
+
51
+ ## 📊 Benchmark Highlights
52
+
53
+ ### Tabular Engine (Fast Mode - <1s)
54
+
55
+ | Task Type | Average Improvement | Best Case |
56
+ |-----------|--------------------:|----------:|
57
+ | **Text Classification** | **+12.44%** | +49.02% (News Headlines) |
58
+ | Time Series | +1.51% | +12.12% (Retail Demand) |
59
+ | Classification | +0.54% | +4.35% |
60
+ | Regression | +0.65% | +5.57% |
61
+
62
+ ### LLM Engine (With Copilot - 30-60s)
63
+
64
+ | Task Type | Average Improvement | Best Case |
65
+ |-----------|--------------------:|----------:|
66
+ | **Regression** | **+7.79%** | +19.66% (Retail Demand) |
67
+ | Classification | +2.38% | +2.87% |
68
+
69
+ - ✅ **12/12 wins** on text classification (tabular mode)
70
+ - 🧠 **+19.66% max improvement** with LLM-powered features
71
+ - ⚡ **<1 second** (tabular) or **30-60s** (with LLM) processing time
72
+ - 📈 Largest gains with simple models (LogisticRegression, Ridge)
73
+
74
+ [View Full Benchmark Results](https://thinkall.github.io/featcopilot/user-guide/benchmarks/)
75
+
76
+ ## Key Features
77
+
78
+ - 🔧 **Multi-Engine Architecture**: Tabular, time series, relational, and text feature engines
79
+ - 🤖 **LLM-Powered Intelligence**: Semantic feature discovery, domain-aware generation, and code synthesis
80
+ - 📊 **Intelligent Selection**: Statistical testing, importance ranking, and redundancy elimination
81
+ - 🔌 **Scikit-learn Compatible**: Drop-in replacement for sklearn transformers
82
+ - 📝 **Interpretable**: Every feature comes with human-readable explanations
83
+
84
+ ## Installation
85
+
86
+ ```bash
87
+ # Basic installation
88
+ pip install featcopilot
89
+
90
+ # With LLM capabilities (requires GitHub Copilot)
91
+ pip install featcopilot[llm]
92
+
93
+ # Full installation
94
+ pip install featcopilot[full]
95
+ ```
96
+
97
+ ## Quick Start
98
+
99
+ ### Fast Mode (Tabular Only)
100
+
101
+ ```python
102
+ from featcopilot import AutoFeatureEngineer
103
+
104
+ # Sub-second feature engineering
105
+ engineer = AutoFeatureEngineer(
106
+ engines=['tabular'],
107
+ max_features=50
108
+ )
109
+
110
+ X_transformed = engineer.fit_transform(X, y) # <1 second
111
+ print(f"Features: {X.shape[1]} -> {X_transformed.shape[1]}")
112
+ ```
113
+
114
+ ### LLM Mode (With Copilot)
115
+
116
+ ```python
117
+ from featcopilot import AutoFeatureEngineer
118
+
119
+ # LLM-powered semantic features (+19.66% max improvement)
120
+ engineer = AutoFeatureEngineer(
121
+ engines=['tabular', 'llm'],
122
+ max_features=50
123
+ )
124
+
125
+ X_transformed = engineer.fit_transform(
126
+ X, y,
127
+ column_descriptions={
128
+ 'age': 'Customer age in years',
129
+ 'income': 'Annual household income in USD',
130
+ 'tenure': 'Months as customer',
131
+ },
132
+ task_description="Predict customer churn"
133
+ ) # 30-60 seconds
134
+
135
+ # Get LLM-generated explanations
136
+ for feature, explanation in engineer.explain_features().items():
137
+ print(f"{feature}: {explanation}")
138
+ ```
139
+
140
+ ## Engines
141
+
142
+ ### Tabular Engine
143
+ Generates polynomial features, interaction terms, and mathematical transformations.
144
+
145
+ ```python
146
+ from featcopilot.engines import TabularEngine
147
+
148
+ engine = TabularEngine(
149
+ polynomial_degree=2,
150
+ interaction_only=False,
151
+ include_transforms=['log', 'sqrt', 'square']
152
+ )
153
+ ```
154
+
155
+ ### Time Series Engine
156
+ Extracts statistical, frequency, and temporal features from time series data.
157
+
158
+ ```python
159
+ from featcopilot.engines import TimeSeriesEngine
160
+
161
+ engine = TimeSeriesEngine(
162
+ features=['mean', 'std', 'skew', 'autocorr', 'fft_coefficients']
163
+ )
164
+ ```
165
+
166
+ ### LLM Engine
167
+ Uses GitHub Copilot SDK for intelligent feature generation.
168
+
169
+ ```python
170
+ from featcopilot.llm import SemanticEngine
171
+
172
+ engine = SemanticEngine(
173
+ model='gpt-5',
174
+ max_suggestions=20,
175
+ validate_features=True
176
+ )
177
+ ```
178
+
179
+ ## Feature Selection
180
+
181
+ ```python
182
+ from featcopilot.selection import FeatureSelector
183
+
184
+ selector = FeatureSelector(
185
+ methods=['mutual_info', 'importance', 'correlation'],
186
+ max_features=30,
187
+ correlation_threshold=0.95
188
+ )
189
+
190
+ X_selected = selector.fit_transform(X, y)
191
+ ```
192
+
193
+ ## Comparison with Existing Libraries
194
+
195
+ | Feature | FeatCopilot | Featuretools | TSFresh | AutoFeat | OpenFE | CAAFE |
196
+ |---------|-------------|--------------|---------|----------|--------|-------|
197
+ | Tabular Features | ✅ | ❌ | ❌ | ✅ | ✅ | ✅ |
198
+ | Time Series | ✅ | ❌ | ✅ | ❌ | ❌ | ❌ |
199
+ | Relational | ✅ | ✅ | ❌ | ❌ | ❌ | ❌ |
200
+ | LLM-Powered | ✅ | ❌ | ❌ | ❌ | ❌ | ✅ |
201
+ | Semantic Understanding | ✅ | ❌ | ❌ | ❌ | ❌ | ⚠️ |
202
+ | Code Generation | ✅ | ❌ | ❌ | ❌ | ❌ | ⚠️ |
203
+ | Sklearn Compatible | ✅ | ✅ | ✅ | ✅ | ✅ | ❌ |
204
+ | Interpretable | ✅ | ⚠️ | ⚠️ | ⚠️ | ❌ | ✅ |
205
+
206
+ ## Documentation
207
+
208
+ 📖 **Full Documentation**: [https://thinkall.github.io/featcopilot/](https://thinkall.github.io/featcopilot/)
209
+
210
+ ## Requirements
211
+
212
+ - Python 3.9+
213
+ - NumPy, Pandas, Scikit-learn
214
+ - GitHub Copilot CLI (for LLM features)
215
+
216
+ ## License
217
+
218
+ MIT License
@@ -0,0 +1,29 @@
1
+ featcopilot/__init__.py,sha256=Ljn4cK5Emw_Rh8rg-T1EqDoyuWz_GXCAkiaZ_amz-cA,708
2
+ featcopilot/core/__init__.py,sha256=TX_AsPRsVDY1ZKO-ApK7qBFhbgngQbYNamhzYU9P3P8,338
3
+ featcopilot/core/base.py,sha256=lN1zfV6GHCNy2XSNV9OH2dXtvwrZZsu376kT_ExrZto,6090
4
+ featcopilot/core/feature.py,sha256=YGE-xDj7cRZDA50Oa5W-USXiEZ1NrX3WagZueRQiZbk,7361
5
+ featcopilot/core/registry.py,sha256=EK4lleLShhMkGRXI1evv6SsVO81rpLLkzJbjImHryJE,4030
6
+ featcopilot/engines/__init__.py,sha256=bXY5eiEQTZ9IxPY1fTESw4v1-nXFkegSs8p4VYGr7TQ,360
7
+ featcopilot/engines/relational.py,sha256=0zr8OhCRg-jvch1HRaxR6RgQFti3McDPvdco-onsUTc,8311
8
+ featcopilot/engines/tabular.py,sha256=M5cynbfNzkFilxV2rPG5kIcEwni-wc_HtjGNuGiSXGU,10781
9
+ featcopilot/engines/text.py,sha256=I9OGN0Rwf2SW1V9shaJDmmud1eymnx5xLdSmLfCzKhU,7396
10
+ featcopilot/engines/timeseries.py,sha256=76YE5QAD-yzXdajeP3wP3cfqtRSOZglz-E-OVSnvvs8,13553
11
+ featcopilot/llm/__init__.py,sha256=75MgMNw8cAggNOUAJV8ib1uV_UCTjA04tdYbqWNwumU,468
12
+ featcopilot/llm/code_generator.py,sha256=dAn5n3fp-sWj3DhQ9I-ZME5bYSXRVSe4Blx5P-W484Q,9715
13
+ featcopilot/llm/copilot_client.py,sha256=BLAENfNjkyOvgeVWQt-94mkTtxcdz5esk0ro6iaUCPM,17328
14
+ featcopilot/llm/explainer.py,sha256=fztidSmLX5dmbPRMcLmgXNv7AvDEJjXM5OzD6NFtQjQ,6142
15
+ featcopilot/llm/semantic_engine.py,sha256=U4CQ-OsWEr_c2Q8HF6yN7jXhKpTaSR6lft-lEacBzXo,12554
16
+ featcopilot/selection/__init__.py,sha256=pjoos64ym3CR7Hk75qq2dY0NN4OT0Sn3hiBSG_h44SE,406
17
+ featcopilot/selection/importance.py,sha256=GzSHTwKIeHJt42mzb1x8ClGFnqU9_2MxV1kOce9SODg,5481
18
+ featcopilot/selection/redundancy.py,sha256=bz47JQtEBKoz9eMIMB3YMOxH_1s7q1OsEUE_Ob3t0xg,5301
19
+ featcopilot/selection/statistical.py,sha256=POuWuMoyggJt1bCML10I6C7gKZoUP7Yc4nMsKBwECYs,6184
20
+ featcopilot/selection/unified.py,sha256=UPrTJ0svTjy6SnqQVeyiAGNjkgTRbXO_AzbucGZNUjc,6273
21
+ featcopilot/transformers/__init__.py,sha256=pHHSivxNVyC9AmKzdA8UJS0B-b9V1B4eI1QgTP0Z8uw,234
22
+ featcopilot/transformers/sklearn_compat.py,sha256=z5oA_y8YENQaGTTnR13u7CWhJpIhNmUoQP-DNVIgPeA,13536
23
+ featcopilot/utils/__init__.py,sha256=PH-fC1WYJYevcMVI4CCoXIUTJlb5lPXwKeKNj56vs_k,198
24
+ featcopilot/utils/cache.py,sha256=CYIVXQU9dpVdrLkemhjVAoCzXRMRzuQ-U36-q_8zDoc,6489
25
+ featcopilot/utils/parallel.py,sha256=MjLAYWYgoeu02IoWNWEZ82sCcRDi5bICJVcSq7dXmN4,2874
26
+ featcopilot-0.1.0.dist-info/METADATA,sha256=EI1MSGG2KkICOCRnLX1CI5-v91N40pmRS6oyAVgk1w4,6804
27
+ featcopilot-0.1.0.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
28
+ featcopilot-0.1.0.dist-info/top_level.txt,sha256=Lf6lB8VBDX71TBzSXW3qq44RO5hIUKQ6DMi_dL8G_Ak,12
29
+ featcopilot-0.1.0.dist-info/RECORD,,
@@ -0,0 +1,5 @@
1
+ Wheel-Version: 1.0
2
+ Generator: setuptools (80.10.2)
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
5
+
@@ -0,0 +1 @@
1
+ featcopilot