myffe 3.3.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- myffe-3.3.0/LICENSE +21 -0
- myffe-3.3.0/PKG-INFO +262 -0
- myffe-3.3.0/README.md +221 -0
- myffe-3.3.0/myffe/FFE/__init__.py +88 -0
- myffe-3.3.0/myffe/__init__.py +198 -0
- myffe-3.3.0/myffe/core/FFE_config.py +349 -0
- myffe-3.3.0/myffe/core/FFE_console.py +659 -0
- myffe-3.3.0/myffe/core/__init__.py +2 -0
- myffe-3.3.0/myffe/core/master_judge.py +259 -0
- myffe-3.3.0/myffe/docs/param_structure.json +101 -0
- myffe-3.3.0/myffe/docs/param_structure_tree.txt +118 -0
- myffe-3.3.0/myffe/docs/predefined_routes.json +662 -0
- myffe-3.3.0/myffe/docs/predefined_routes_tree.txt +991 -0
- myffe-3.3.0/myffe/preprocessing/__init__.py +21 -0
- myffe-3.3.0/myffe/preprocessing/nan_preprocessing.py +844 -0
- myffe-3.3.0/myffe/preprocessing/normalized_preprocessing.py +166 -0
- myffe-3.3.0/myffe/preprocessing/outlier_preprocessing.py +923 -0
- myffe-3.3.0/myffe/preprocessing/relative_preprocessing.py +967 -0
- myffe-3.3.0/myffe/preprocessing/sample_preprocessing.py +791 -0
- myffe-3.3.0/myffe/preprocessing/specify_preprocessing.py +373 -0
- myffe-3.3.0/myffe/preprocessing/standard_preprocessing.py +399 -0
- myffe-3.3.0/myffe/preprocessing/str_preprocessing.py +1665 -0
- myffe-3.3.0/myffe/preprocessing/time_preprocessing.py +581 -0
- myffe-3.3.0/myffe/tools/Loggerv1_1.py +298 -0
- myffe-3.3.0/myffe/tools/__init__.py +8 -0
- myffe-3.3.0/myffe/tools/color_printer.py +71 -0
- myffe-3.3.0/myffe/tools/data_description.py +238 -0
- myffe-3.3.0/myffe/tools/data_inspector.py +58 -0
- myffe-3.3.0/myffe/tools/evaluation_decorator.py +57 -0
- myffe-3.3.0/myffe/tools/len_match.py +8 -0
- myffe-3.3.0/myffe/tools/param_utils.py +216 -0
- myffe-3.3.0/myffe/tools/system_validator.py +189 -0
- myffe-3.3.0/myffe/tools/unified_config_manager.py +218 -0
- myffe-3.3.0/myffe/tools/visualize_route.py +171 -0
- myffe-3.3.0/myffe/tools//344/272/244/344/272/222/345/274/217.py +177 -0
- myffe-3.3.0/myffe.egg-info/PKG-INFO +262 -0
- myffe-3.3.0/myffe.egg-info/SOURCES.txt +40 -0
- myffe-3.3.0/myffe.egg-info/dependency_links.txt +1 -0
- myffe-3.3.0/myffe.egg-info/requires.txt +4 -0
- myffe-3.3.0/myffe.egg-info/top_level.txt +1 -0
- myffe-3.3.0/setup.cfg +4 -0
- myffe-3.3.0/setup.py +51 -0
myffe-3.3.0/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 myffe
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
myffe-3.3.0/PKG-INFO
ADDED
|
@@ -0,0 +1,262 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: myffe
|
|
3
|
+
Version: 3.3.0
|
|
4
|
+
Summary: Feature Engineering Framework - A comprehensive tool for data preprocessing and feature engineering
|
|
5
|
+
Home-page: https://github.com/yourusername/myffe
|
|
6
|
+
Author: Your Name
|
|
7
|
+
Author-email: your.email@example.com
|
|
8
|
+
Keywords: feature engineering,machine learning,data preprocessing,data cleaning,data science,python
|
|
9
|
+
Classifier: Programming Language :: Python :: 3
|
|
10
|
+
Classifier: Programming Language :: Python :: 3.7
|
|
11
|
+
Classifier: Programming Language :: Python :: 3.8
|
|
12
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
13
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
17
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
18
|
+
Classifier: Operating System :: OS Independent
|
|
19
|
+
Classifier: Intended Audience :: Developers
|
|
20
|
+
Classifier: Intended Audience :: Science/Research
|
|
21
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
22
|
+
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
23
|
+
Requires-Python: >=3.7
|
|
24
|
+
Description-Content-Type: text/markdown
|
|
25
|
+
License-File: LICENSE
|
|
26
|
+
Requires-Dist: pandas>=1.3.0
|
|
27
|
+
Requires-Dist: numpy>=1.20.0
|
|
28
|
+
Requires-Dist: scikit-learn>=0.24.0
|
|
29
|
+
Requires-Dist: scipy>=1.6.0
|
|
30
|
+
Dynamic: author
|
|
31
|
+
Dynamic: author-email
|
|
32
|
+
Dynamic: classifier
|
|
33
|
+
Dynamic: description
|
|
34
|
+
Dynamic: description-content-type
|
|
35
|
+
Dynamic: home-page
|
|
36
|
+
Dynamic: keywords
|
|
37
|
+
Dynamic: license-file
|
|
38
|
+
Dynamic: requires-dist
|
|
39
|
+
Dynamic: requires-python
|
|
40
|
+
Dynamic: summary
|
|
41
|
+
|
|
42
|
+
# myffe - Feature Engineering Framework
|
|
43
|
+
|
|
44
|
+
myffe 是一个工业级的特征工程框架,提供完整的特征工程工具链,支持数据清洗、特征提取、样本处理等功能。
|
|
45
|
+
|
|
46
|
+
## 特性
|
|
47
|
+
|
|
48
|
+
- **模块化设计**:易于扩展和定制
|
|
49
|
+
- **多种预处理方法**:支持缺失值处理、异常值处理、标准化、归一化等
|
|
50
|
+
- **预定义路线**:提供 20+ 种预定义的特征工程路线
|
|
51
|
+
- **自动参数推荐**:智能推荐最佳预处理参数
|
|
52
|
+
- **工业级封装**:外部接口简洁易用
|
|
53
|
+
- **纯 Python 实现**:无需编译,跨平台兼容
|
|
54
|
+
|
|
55
|
+
## 安装
|
|
56
|
+
|
|
57
|
+
```bash
|
|
58
|
+
pip install myffe
|
|
59
|
+
```
|
|
60
|
+
|
|
61
|
+
## 快速开始
|
|
62
|
+
|
|
63
|
+
### 简单使用
|
|
64
|
+
|
|
65
|
+
```python
|
|
66
|
+
import pandas as pd
|
|
67
|
+
from myffe import process_data
|
|
68
|
+
|
|
69
|
+
# 读取数据
|
|
70
|
+
data = pd.read_csv('your_data.csv')
|
|
71
|
+
|
|
72
|
+
# 使用预定义路线 1(基础路线)处理数据
|
|
73
|
+
processed_data = process_data(data, data_label='label', route_number=1)
|
|
74
|
+
```
|
|
75
|
+
|
|
76
|
+
### 高级使用
|
|
77
|
+
|
|
78
|
+
```python
|
|
79
|
+
from myffe import FFE
|
|
80
|
+
|
|
81
|
+
# 创建 FFE 实例
|
|
82
|
+
ffe = FFE()
|
|
83
|
+
|
|
84
|
+
# 创建流水线
|
|
85
|
+
console, tools = ffe.create_pipeline('label', route_number=3)
|
|
86
|
+
|
|
87
|
+
# 处理数据
|
|
88
|
+
console(tools, data, save_path='processed_data.csv')
|
|
89
|
+
```
|
|
90
|
+
|
|
91
|
+
### 自动推荐路线
|
|
92
|
+
|
|
93
|
+
```python
|
|
94
|
+
from myffe import FFE
|
|
95
|
+
|
|
96
|
+
ffe = FFE()
|
|
97
|
+
|
|
98
|
+
# 自动检测数据并推荐路线
|
|
99
|
+
recommended_route = ffe.auto_detect_route(data)
|
|
100
|
+
|
|
101
|
+
# 使用推荐的路线处理数据
|
|
102
|
+
console, tools = ffe.create_pipeline('label', route=recommended_route)
|
|
103
|
+
console(tools, data, save_path='processed_data.csv')
|
|
104
|
+
```
|
|
105
|
+
|
|
106
|
+
## 预定义路线
|
|
107
|
+
|
|
108
|
+
myffe 提供 20+ 种预定义路线,适用于不同场景:
|
|
109
|
+
|
|
110
|
+
1. **基础路线** - 适合大多数数据集
|
|
111
|
+
2. **股票数据路线** - 专为股票数据优化
|
|
112
|
+
3. **文本数据路线** - 包含文本处理
|
|
113
|
+
4. **时间序列路线** - 强化时间特征提取
|
|
114
|
+
5. **完整路线** - 包含所有预处理步骤
|
|
115
|
+
6. **轻量级路线** - 快速处理小数据集
|
|
116
|
+
7. **异常检测路线** - 专注于异常值处理
|
|
117
|
+
8. **特征增强路线** - 强化特征工程
|
|
118
|
+
9. **分类数据路线** - 适合分类任务
|
|
119
|
+
10. **回归数据路线** - 适合回归任务
|
|
120
|
+
11. **中文文本路线** - 专为中文文本数据优化
|
|
121
|
+
12. **不平衡数据路线** - 处理类别不平衡的数据集
|
|
122
|
+
13. **高维数据路线** - 处理特征维度较高的数据集
|
|
123
|
+
14. **金融数据路线** - 专为金融数据优化
|
|
124
|
+
15. **医疗数据路线** - 适合医疗数据集的处理
|
|
125
|
+
16. **电商数据路线** - 专为电商数据优化
|
|
126
|
+
17. **社交媒体数据路线** - 处理社交媒体数据
|
|
127
|
+
18. **传感器数据路线** - 处理传感器数据
|
|
128
|
+
19. **多模态数据路线** - 处理包含多种类型数据的数据集
|
|
129
|
+
20. **实时数据路线** - 适合实时数据流处理
|
|
130
|
+
|
|
131
|
+
查看可用路线:
|
|
132
|
+
|
|
133
|
+
```python
|
|
134
|
+
from myffe import show_available_routes
|
|
135
|
+
show_available_routes()
|
|
136
|
+
```
|
|
137
|
+
|
|
138
|
+
## 核心模块
|
|
139
|
+
|
|
140
|
+
### 1. 数据预处理 (preprocessing)
|
|
141
|
+
|
|
142
|
+
- `nan_preprocessing` - 缺失值处理
|
|
143
|
+
- `outlier_preprocessing` - 异常值处理
|
|
144
|
+
- `str_preprocessing` - 字符串处理
|
|
145
|
+
- `time_preprocessing` - 时间特征提取
|
|
146
|
+
- `standard_preprocessing` - 数据标准化
|
|
147
|
+
- `normalized_preprocessing` - 数据归一化
|
|
148
|
+
- `sample_preprocessing` - 样本处理
|
|
149
|
+
- `relative_preprocessing` - 相对特征处理
|
|
150
|
+
- `specify_preprocessing` - 指定特征处理
|
|
151
|
+
|
|
152
|
+
### 2. 路线配置 (route)
|
|
153
|
+
|
|
154
|
+
- `predefined_routes` - 预定义路线
|
|
155
|
+
- `auto_route` - 自动路线推荐
|
|
156
|
+
- `route_config` - 路线配置
|
|
157
|
+
|
|
158
|
+
### 3. 工具 (tools)
|
|
159
|
+
|
|
160
|
+
- `data_description` - 数据描述
|
|
161
|
+
- `data_inspector` - 数据检查
|
|
162
|
+
- `visualize_route` - 路线可视化
|
|
163
|
+
- `logger` - 日志系统
|
|
164
|
+
- `color_printer` - 彩色打印
|
|
165
|
+
|
|
166
|
+
## 核心接口
|
|
167
|
+
|
|
168
|
+
- `FFE` - 特征工程框架主类
|
|
169
|
+
- `FFE_config` - 配置类
|
|
170
|
+
- `FFE_console` - 控制台类
|
|
171
|
+
- `process_data` - 数据处理函数
|
|
172
|
+
- `get_recommended_route` - 获取推荐路线
|
|
173
|
+
- `show_available_routes` - 显示可用路线
|
|
174
|
+
- `get_route_params` - 获取路线参数
|
|
175
|
+
- `describe_data` - 描述数据
|
|
176
|
+
- `evaluate_dataset` - 评估数据集
|
|
177
|
+
- `visualize_route` - 可视化路线
|
|
178
|
+
|
|
179
|
+
## 使用示例
|
|
180
|
+
|
|
181
|
+
### 示例 1:基础数据处理
|
|
182
|
+
|
|
183
|
+
```python
|
|
184
|
+
import pandas as pd
|
|
185
|
+
from myffe import process_data
|
|
186
|
+
|
|
187
|
+
# 创建示例数据
|
|
188
|
+
data = pd.DataFrame({
|
|
189
|
+
'feature1': [1.0, 2.0, None, 4.0, 5.0],
|
|
190
|
+
'feature2': ['A', 'B', 'A', None, 'C'],
|
|
191
|
+
'label': [0, 1, 0, 1, 0]
|
|
192
|
+
})
|
|
193
|
+
|
|
194
|
+
# 处理数据
|
|
195
|
+
processed = process_data(data, data_label='label', route_number=1)
|
|
196
|
+
print(processed.head())
|
|
197
|
+
```
|
|
198
|
+
|
|
199
|
+
### 示例 2:自定义路线
|
|
200
|
+
|
|
201
|
+
```python
|
|
202
|
+
from myffe import FFE, get_route_params
|
|
203
|
+
|
|
204
|
+
# 获取路线 3 的参数
|
|
205
|
+
params = get_route_params(3)
|
|
206
|
+
|
|
207
|
+
# 创建 FFE 实例
|
|
208
|
+
ffe = FFE()
|
|
209
|
+
|
|
210
|
+
# 创建流水线
|
|
211
|
+
console, tools = ffe.create_pipeline('label', route=params)
|
|
212
|
+
|
|
213
|
+
# 处理数据
|
|
214
|
+
console(tools, data, save_path='output.csv')
|
|
215
|
+
```
|
|
216
|
+
|
|
217
|
+
### 示例 3:数据评估
|
|
218
|
+
|
|
219
|
+
```python
|
|
220
|
+
from myffe import FFE, evaluate_dataset
|
|
221
|
+
|
|
222
|
+
ffe = FFE()
|
|
223
|
+
|
|
224
|
+
# 评估数据集
|
|
225
|
+
metrics = evaluate_dataset(data, 'label')
|
|
226
|
+
print(metrics)
|
|
227
|
+
```
|
|
228
|
+
|
|
229
|
+
## 日志系统
|
|
230
|
+
|
|
231
|
+
myffe 内置日志系统,可以记录所有处理步骤:
|
|
232
|
+
|
|
233
|
+
```python
|
|
234
|
+
from myffe import logger, set_logging_enabled
|
|
235
|
+
|
|
236
|
+
# 启用日志
|
|
237
|
+
set_logging_enabled(True)
|
|
238
|
+
|
|
239
|
+
# 禁用日志
|
|
240
|
+
set_logging_enabled(False)
|
|
241
|
+
```
|
|
242
|
+
|
|
243
|
+
## 系统要求
|
|
244
|
+
|
|
245
|
+
- Python >= 3.7
|
|
246
|
+
- pandas >= 1.3.0
|
|
247
|
+
- numpy >= 1.20.0
|
|
248
|
+
- scikit-learn >= 0.24.0
|
|
249
|
+
- scipy >= 1.6.0
|
|
250
|
+
|
|
251
|
+
## 许可证
|
|
252
|
+
|
|
253
|
+
MIT License
|
|
254
|
+
|
|
255
|
+
## 贡献
|
|
256
|
+
|
|
257
|
+
欢迎贡献代码!请提交 Issue 或 Pull Request。
|
|
258
|
+
|
|
259
|
+
## 联系方式
|
|
260
|
+
|
|
261
|
+
- Email: your.email@example.com
|
|
262
|
+
- GitHub: https://github.com/yourusername/myffe
|
myffe-3.3.0/README.md
ADDED
|
@@ -0,0 +1,221 @@
|
|
|
1
|
+
# myffe - Feature Engineering Framework
|
|
2
|
+
|
|
3
|
+
myffe 是一个工业级的特征工程框架,提供完整的特征工程工具链,支持数据清洗、特征提取、样本处理等功能。
|
|
4
|
+
|
|
5
|
+
## 特性
|
|
6
|
+
|
|
7
|
+
- **模块化设计**:易于扩展和定制
|
|
8
|
+
- **多种预处理方法**:支持缺失值处理、异常值处理、标准化、归一化等
|
|
9
|
+
- **预定义路线**:提供 20+ 种预定义的特征工程路线
|
|
10
|
+
- **自动参数推荐**:智能推荐最佳预处理参数
|
|
11
|
+
- **工业级封装**:外部接口简洁易用
|
|
12
|
+
- **纯 Python 实现**:无需编译,跨平台兼容
|
|
13
|
+
|
|
14
|
+
## 安装
|
|
15
|
+
|
|
16
|
+
```bash
|
|
17
|
+
pip install myffe
|
|
18
|
+
```
|
|
19
|
+
|
|
20
|
+
## 快速开始
|
|
21
|
+
|
|
22
|
+
### 简单使用
|
|
23
|
+
|
|
24
|
+
```python
|
|
25
|
+
import pandas as pd
|
|
26
|
+
from myffe import process_data
|
|
27
|
+
|
|
28
|
+
# 读取数据
|
|
29
|
+
data = pd.read_csv('your_data.csv')
|
|
30
|
+
|
|
31
|
+
# 使用预定义路线 1(基础路线)处理数据
|
|
32
|
+
processed_data = process_data(data, data_label='label', route_number=1)
|
|
33
|
+
```
|
|
34
|
+
|
|
35
|
+
### 高级使用
|
|
36
|
+
|
|
37
|
+
```python
|
|
38
|
+
from myffe import FFE
|
|
39
|
+
|
|
40
|
+
# 创建 FFE 实例
|
|
41
|
+
ffe = FFE()
|
|
42
|
+
|
|
43
|
+
# 创建流水线
|
|
44
|
+
console, tools = ffe.create_pipeline('label', route_number=3)
|
|
45
|
+
|
|
46
|
+
# 处理数据
|
|
47
|
+
console(tools, data, save_path='processed_data.csv')
|
|
48
|
+
```
|
|
49
|
+
|
|
50
|
+
### 自动推荐路线
|
|
51
|
+
|
|
52
|
+
```python
|
|
53
|
+
from myffe import FFE
|
|
54
|
+
|
|
55
|
+
ffe = FFE()
|
|
56
|
+
|
|
57
|
+
# 自动检测数据并推荐路线
|
|
58
|
+
recommended_route = ffe.auto_detect_route(data)
|
|
59
|
+
|
|
60
|
+
# 使用推荐的路线处理数据
|
|
61
|
+
console, tools = ffe.create_pipeline('label', route=recommended_route)
|
|
62
|
+
console(tools, data, save_path='processed_data.csv')
|
|
63
|
+
```
|
|
64
|
+
|
|
65
|
+
## 预定义路线
|
|
66
|
+
|
|
67
|
+
myffe 提供 20+ 种预定义路线,适用于不同场景:
|
|
68
|
+
|
|
69
|
+
1. **基础路线** - 适合大多数数据集
|
|
70
|
+
2. **股票数据路线** - 专为股票数据优化
|
|
71
|
+
3. **文本数据路线** - 包含文本处理
|
|
72
|
+
4. **时间序列路线** - 强化时间特征提取
|
|
73
|
+
5. **完整路线** - 包含所有预处理步骤
|
|
74
|
+
6. **轻量级路线** - 快速处理小数据集
|
|
75
|
+
7. **异常检测路线** - 专注于异常值处理
|
|
76
|
+
8. **特征增强路线** - 强化特征工程
|
|
77
|
+
9. **分类数据路线** - 适合分类任务
|
|
78
|
+
10. **回归数据路线** - 适合回归任务
|
|
79
|
+
11. **中文文本路线** - 专为中文文本数据优化
|
|
80
|
+
12. **不平衡数据路线** - 处理类别不平衡的数据集
|
|
81
|
+
13. **高维数据路线** - 处理特征维度较高的数据集
|
|
82
|
+
14. **金融数据路线** - 专为金融数据优化
|
|
83
|
+
15. **医疗数据路线** - 适合医疗数据集的处理
|
|
84
|
+
16. **电商数据路线** - 专为电商数据优化
|
|
85
|
+
17. **社交媒体数据路线** - 处理社交媒体数据
|
|
86
|
+
18. **传感器数据路线** - 处理传感器数据
|
|
87
|
+
19. **多模态数据路线** - 处理包含多种类型数据的数据集
|
|
88
|
+
20. **实时数据路线** - 适合实时数据流处理
|
|
89
|
+
|
|
90
|
+
查看可用路线:
|
|
91
|
+
|
|
92
|
+
```python
|
|
93
|
+
from myffe import show_available_routes
|
|
94
|
+
show_available_routes()
|
|
95
|
+
```
|
|
96
|
+
|
|
97
|
+
## 核心模块
|
|
98
|
+
|
|
99
|
+
### 1. 数据预处理 (preprocessing)
|
|
100
|
+
|
|
101
|
+
- `nan_preprocessing` - 缺失值处理
|
|
102
|
+
- `outlier_preprocessing` - 异常值处理
|
|
103
|
+
- `str_preprocessing` - 字符串处理
|
|
104
|
+
- `time_preprocessing` - 时间特征提取
|
|
105
|
+
- `standard_preprocessing` - 数据标准化
|
|
106
|
+
- `normalized_preprocessing` - 数据归一化
|
|
107
|
+
- `sample_preprocessing` - 样本处理
|
|
108
|
+
- `relative_preprocessing` - 相对特征处理
|
|
109
|
+
- `specify_preprocessing` - 指定特征处理
|
|
110
|
+
|
|
111
|
+
### 2. 路线配置 (route)
|
|
112
|
+
|
|
113
|
+
- `predefined_routes` - 预定义路线
|
|
114
|
+
- `auto_route` - 自动路线推荐
|
|
115
|
+
- `route_config` - 路线配置
|
|
116
|
+
|
|
117
|
+
### 3. 工具 (tools)
|
|
118
|
+
|
|
119
|
+
- `data_description` - 数据描述
|
|
120
|
+
- `data_inspector` - 数据检查
|
|
121
|
+
- `visualize_route` - 路线可视化
|
|
122
|
+
- `logger` - 日志系统
|
|
123
|
+
- `color_printer` - 彩色打印
|
|
124
|
+
|
|
125
|
+
## 核心接口
|
|
126
|
+
|
|
127
|
+
- `FFE` - 特征工程框架主类
|
|
128
|
+
- `FFE_config` - 配置类
|
|
129
|
+
- `FFE_console` - 控制台类
|
|
130
|
+
- `process_data` - 数据处理函数
|
|
131
|
+
- `get_recommended_route` - 获取推荐路线
|
|
132
|
+
- `show_available_routes` - 显示可用路线
|
|
133
|
+
- `get_route_params` - 获取路线参数
|
|
134
|
+
- `describe_data` - 描述数据
|
|
135
|
+
- `evaluate_dataset` - 评估数据集
|
|
136
|
+
- `visualize_route` - 可视化路线
|
|
137
|
+
|
|
138
|
+
## 使用示例
|
|
139
|
+
|
|
140
|
+
### 示例 1:基础数据处理
|
|
141
|
+
|
|
142
|
+
```python
|
|
143
|
+
import pandas as pd
|
|
144
|
+
from myffe import process_data
|
|
145
|
+
|
|
146
|
+
# 创建示例数据
|
|
147
|
+
data = pd.DataFrame({
|
|
148
|
+
'feature1': [1.0, 2.0, None, 4.0, 5.0],
|
|
149
|
+
'feature2': ['A', 'B', 'A', None, 'C'],
|
|
150
|
+
'label': [0, 1, 0, 1, 0]
|
|
151
|
+
})
|
|
152
|
+
|
|
153
|
+
# 处理数据
|
|
154
|
+
processed = process_data(data, data_label='label', route_number=1)
|
|
155
|
+
print(processed.head())
|
|
156
|
+
```
|
|
157
|
+
|
|
158
|
+
### 示例 2:自定义路线
|
|
159
|
+
|
|
160
|
+
```python
|
|
161
|
+
from myffe import FFE, get_route_params
|
|
162
|
+
|
|
163
|
+
# 获取路线 3 的参数
|
|
164
|
+
params = get_route_params(3)
|
|
165
|
+
|
|
166
|
+
# 创建 FFE 实例
|
|
167
|
+
ffe = FFE()
|
|
168
|
+
|
|
169
|
+
# 创建流水线
|
|
170
|
+
console, tools = ffe.create_pipeline('label', route=params)
|
|
171
|
+
|
|
172
|
+
# 处理数据
|
|
173
|
+
console(tools, data, save_path='output.csv')
|
|
174
|
+
```
|
|
175
|
+
|
|
176
|
+
### 示例 3:数据评估
|
|
177
|
+
|
|
178
|
+
```python
|
|
179
|
+
from myffe import FFE, evaluate_dataset
|
|
180
|
+
|
|
181
|
+
ffe = FFE()
|
|
182
|
+
|
|
183
|
+
# 评估数据集
|
|
184
|
+
metrics = evaluate_dataset(data, 'label')
|
|
185
|
+
print(metrics)
|
|
186
|
+
```
|
|
187
|
+
|
|
188
|
+
## 日志系统
|
|
189
|
+
|
|
190
|
+
myffe 内置日志系统,可以记录所有处理步骤:
|
|
191
|
+
|
|
192
|
+
```python
|
|
193
|
+
from myffe import logger, set_logging_enabled
|
|
194
|
+
|
|
195
|
+
# 启用日志
|
|
196
|
+
set_logging_enabled(True)
|
|
197
|
+
|
|
198
|
+
# 禁用日志
|
|
199
|
+
set_logging_enabled(False)
|
|
200
|
+
```
|
|
201
|
+
|
|
202
|
+
## 系统要求
|
|
203
|
+
|
|
204
|
+
- Python >= 3.7
|
|
205
|
+
- pandas >= 1.3.0
|
|
206
|
+
- numpy >= 1.20.0
|
|
207
|
+
- scikit-learn >= 0.24.0
|
|
208
|
+
- scipy >= 1.6.0
|
|
209
|
+
|
|
210
|
+
## 许可证
|
|
211
|
+
|
|
212
|
+
MIT License
|
|
213
|
+
|
|
214
|
+
## 贡献
|
|
215
|
+
|
|
216
|
+
欢迎贡献代码!请提交 Issue 或 Pull Request。
|
|
217
|
+
|
|
218
|
+
## 联系方式
|
|
219
|
+
|
|
220
|
+
- Email: your.email@example.com
|
|
221
|
+
- GitHub: https://github.com/yourusername/myffe
|
|
@@ -0,0 +1,88 @@
|
|
|
1
|
+
# FFE 特征工程框架
|
|
2
|
+
# 包含myffe版本的所有核心功能
|
|
3
|
+
|
|
4
|
+
from .myffe import (
|
|
5
|
+
FFE_config,
|
|
6
|
+
FFE_console,
|
|
7
|
+
RouteConfig,
|
|
8
|
+
visualize_route,
|
|
9
|
+
describe_data,
|
|
10
|
+
evaluate_dataset,
|
|
11
|
+
print_evaluation_result,
|
|
12
|
+
get_route_params,
|
|
13
|
+
show_available_routes,
|
|
14
|
+
show_routes_visual,
|
|
15
|
+
process_data,
|
|
16
|
+
get_recommended_route
|
|
17
|
+
)
|
|
18
|
+
|
|
19
|
+
__all__ = [
|
|
20
|
+
'FFE_config',
|
|
21
|
+
'FFE_console',
|
|
22
|
+
'RouteConfig',
|
|
23
|
+
'visualize_route',
|
|
24
|
+
'describe_data',
|
|
25
|
+
'evaluate_dataset',
|
|
26
|
+
'print_evaluation_result',
|
|
27
|
+
'get_route_params',
|
|
28
|
+
'show_available_routes',
|
|
29
|
+
'show_routes_visual',
|
|
30
|
+
'process_data',
|
|
31
|
+
'get_recommended_route'
|
|
32
|
+
]
|
|
33
|
+
|
|
34
|
+
# 版本信息
|
|
35
|
+
__version__ = '3.3.0'
|
|
36
|
+
|
|
37
|
+
# 便捷导入别名
|
|
38
|
+
FFE = FFE_console
|
|
39
|
+
Config = FFE_config
|
|
40
|
+
|
|
41
|
+
# 添加到__all__
|
|
42
|
+
__all__.extend(['FFE', 'Config'])
|
|
43
|
+
|
|
44
|
+
# 导出主要功能
|
|
45
|
+
def process(data, data_label, route=1, save_path='./processed_data.csv', **kwargs):
|
|
46
|
+
"""
|
|
47
|
+
处理数据的便捷函数(简化版)
|
|
48
|
+
|
|
49
|
+
Args:
|
|
50
|
+
data (DataFrame): 输入数据
|
|
51
|
+
data_label (str): 标签列名称
|
|
52
|
+
route (int): 预定义路线编号,默认为1(基础路线)
|
|
53
|
+
save_path (str): 处理后数据的保存路径
|
|
54
|
+
**kwargs: 其他参数
|
|
55
|
+
|
|
56
|
+
Returns:
|
|
57
|
+
DataFrame: 处理后的数据
|
|
58
|
+
"""
|
|
59
|
+
return process_data(data, data_label, route_number=route, save_path=save_path, **kwargs)
|
|
60
|
+
|
|
61
|
+
def pipeline(data_label, route=1, **kwargs):
|
|
62
|
+
"""
|
|
63
|
+
创建特征工程流水线(简化版)
|
|
64
|
+
|
|
65
|
+
Args:
|
|
66
|
+
data_label (str): 标签列名称
|
|
67
|
+
route (int): 预定义路线编号,默认为1(基础路线)
|
|
68
|
+
**kwargs: 其他参数
|
|
69
|
+
|
|
70
|
+
Returns:
|
|
71
|
+
tuple: (console, clean_tools) - 控制台实例和预处理工具字典
|
|
72
|
+
"""
|
|
73
|
+
from .myffe import FFE
|
|
74
|
+
ffe = FFE()
|
|
75
|
+
return ffe.create_pipeline(data_label, route_number=route, **kwargs)
|
|
76
|
+
|
|
77
|
+
# 添加到__all__
|
|
78
|
+
__all__.extend(['process', 'pipeline'])
|
|
79
|
+
|
|
80
|
+
# 显示可用路线的便捷函数
|
|
81
|
+
def routes():
|
|
82
|
+
"""
|
|
83
|
+
显示所有可用的预定义路线
|
|
84
|
+
"""
|
|
85
|
+
show_available_routes()
|
|
86
|
+
|
|
87
|
+
# 添加到__all__
|
|
88
|
+
__all__.append('routes')
|