mlgear 0.4__py3-none-any.whl → 0.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mlgear/cv.py +4 -1
- mlgear/models.py +32 -13
- mlgear/utils.py +5 -1
- {mlgear-0.4.dist-info → mlgear-0.5.dist-info}/METADATA +36 -7
- mlgear-0.5.dist-info/RECORD +13 -0
- {mlgear-0.4.dist-info → mlgear-0.5.dist-info}/WHEEL +1 -2
- mlgear-0.4.dist-info/RECORD +0 -14
- mlgear-0.4.dist-info/top_level.txt +0 -1
- {mlgear-0.4.dist-info → mlgear-0.5.dist-info}/LICENSE.txt +0 -0
mlgear/cv.py
CHANGED
@@ -51,7 +51,10 @@ def run_cv_model(train, test=None, target=None, model_fn=None, params={}, eval_f
|
|
51
51
|
models[i] = model
|
52
52
|
if importances is not None and isinstance(train, pd.DataFrame):
|
53
53
|
fold_importance_df = pd.DataFrame()
|
54
|
-
|
54
|
+
if params.get('group') is None:
|
55
|
+
fold_importance_df['feature'] = train.columns.values
|
56
|
+
else:
|
57
|
+
fold_importance_df['feature'] = [c for c in train.columns.values if c != params['group']]
|
55
58
|
fold_importance_df['importance'] = importances
|
56
59
|
fold_importance_df['fold'] = i
|
57
60
|
feature_importance_df = pd.concat([feature_importance_df, fold_importance_df], axis=0)
|
mlgear/models.py
CHANGED
@@ -11,16 +11,13 @@ from mlgear.utils import print_step
|
|
11
11
|
def runLGB(train_X, train_y, test_X=None, test_y=None, test_X2=None, params={}, meta=None, verbose=True):
|
12
12
|
if verbose:
|
13
13
|
print_step('Prep LGB')
|
14
|
-
|
15
|
-
if
|
16
|
-
|
17
|
-
watchlist = [d_train, d_valid]
|
14
|
+
|
15
|
+
if params.get('group'):
|
16
|
+
group = params.pop('group')
|
18
17
|
else:
|
19
|
-
|
20
|
-
|
21
|
-
print_step('Train LGB')
|
18
|
+
group = None
|
19
|
+
|
22
20
|
num_rounds = params.pop('num_rounds')
|
23
|
-
verbose_eval = params.pop('verbose_eval')
|
24
21
|
early_stop = None
|
25
22
|
if params.get('early_stop'):
|
26
23
|
early_stop = params.pop('early_stop')
|
@@ -37,6 +34,31 @@ def runLGB(train_X, train_y, test_X=None, test_y=None, test_X2=None, params={},
|
|
37
34
|
else:
|
38
35
|
feval = None
|
39
36
|
|
37
|
+
if group is None:
|
38
|
+
d_train = lgb.Dataset(train_X, label=train_y)
|
39
|
+
else:
|
40
|
+
d_train = lgb.Dataset(train_X.drop(group, axis=1),
|
41
|
+
label=train_y,
|
42
|
+
group=train_X.groupby(group).size().to_numpy())
|
43
|
+
|
44
|
+
if test_X is not None:
|
45
|
+
if group is None:
|
46
|
+
d_valid = lgb.Dataset(test_X, label=test_y)
|
47
|
+
else:
|
48
|
+
d_valid = lgb.Dataset(test_X.drop(group, axis=1),
|
49
|
+
label=test_y,
|
50
|
+
group=test_X.groupby(group).size().to_numpy())
|
51
|
+
test_X = test_X.drop(group, axis=1)
|
52
|
+
watchlist = [d_train, d_valid]
|
53
|
+
else:
|
54
|
+
watchlist = [d_train]
|
55
|
+
|
56
|
+
if test_X2 is not None and group is not None:
|
57
|
+
test_X2 = test_X2.drop(group, axis=1)
|
58
|
+
|
59
|
+
if verbose:
|
60
|
+
print_step('Train LGB')
|
61
|
+
|
40
62
|
preds_test_y = []
|
41
63
|
preds_test_y2 = []
|
42
64
|
for b in range(nbag):
|
@@ -45,9 +67,7 @@ def runLGB(train_X, train_y, test_X=None, test_y=None, test_X2=None, params={},
|
|
45
67
|
train_set=d_train,
|
46
68
|
num_boost_round=num_rounds,
|
47
69
|
valid_sets=watchlist,
|
48
|
-
|
49
|
-
early_stopping_rounds=early_stop,
|
50
|
-
categorical_feature=cat_cols,
|
70
|
+
callbacks=[lgb.early_stopping(stopping_rounds=early_stop)] if early_stop else [],
|
51
71
|
feval=feval)
|
52
72
|
if test_X is not None:
|
53
73
|
if verbose:
|
@@ -75,8 +95,7 @@ def get_lgb_feature_importance(train, target, params):
|
|
75
95
|
train_d = lgb.Dataset(train, label=target)
|
76
96
|
lgb_params2 = params.copy()
|
77
97
|
rounds = lgb_params2.pop('num_rounds', 400)
|
78
|
-
|
79
|
-
model = lgb.train(lgb_params2, train_d, rounds, valid_sets = [train_d], verbose_eval=verbose_eval)
|
98
|
+
model = lgb.train(lgb_params2, train_d, rounds, valid_sets = [train_d])
|
80
99
|
feature_df = pd.DataFrame(sorted(zip(model.feature_importance(), train.columns)),
|
81
100
|
columns=['Value', 'Feature']).sort_values('Value', ascending=False)
|
82
101
|
return feature_df
|
mlgear/utils.py
CHANGED
@@ -14,7 +14,7 @@ def show(df, max_rows=10, max_cols=None, digits=6):
|
|
14
14
|
|
15
15
|
|
16
16
|
def display_column(df, var):
|
17
|
-
if df[var].nunique() > 9 and (df[var].dtype == int or df[var].dtype == float):
|
17
|
+
if df[var].astype(str).nunique() > 9 and (df[var].dtype == int or df[var].dtype == float):
|
18
18
|
print('Mean: {} Median: {} SD: {}'.format(df[var].mean(), df[var].median(), df[var].std()))
|
19
19
|
else:
|
20
20
|
print(df[var].value_counts(normalize=True) * 100)
|
@@ -36,3 +36,7 @@ def chunk(l, n):
|
|
36
36
|
for i in range(0, len(l), n):
|
37
37
|
out.append(l[i:i + n])
|
38
38
|
return out
|
39
|
+
|
40
|
+
|
41
|
+
def min_max(dat):
|
42
|
+
return (min(dat), max(dat))
|
@@ -1,16 +1,28 @@
|
|
1
|
-
Metadata-Version: 2.
|
1
|
+
Metadata-Version: 2.3
|
2
2
|
Name: mlgear
|
3
|
-
Version: 0.
|
3
|
+
Version: 0.5
|
4
4
|
Summary: Utility scripts for machine learning
|
5
|
-
|
5
|
+
License: MIT
|
6
6
|
Author: Peter Hurford
|
7
7
|
Author-email: peter@peterhurford.com
|
8
|
-
|
9
|
-
Platform: UNKNOWN
|
8
|
+
Requires-Python: >=3.7,<4.0
|
10
9
|
Classifier: Development Status :: 3 - Alpha
|
11
|
-
Classifier: Programming Language :: Python :: 3
|
12
10
|
Classifier: License :: OSI Approved :: MIT License
|
13
11
|
Classifier: Operating System :: OS Independent
|
12
|
+
Classifier: Programming Language :: Python :: 3
|
13
|
+
Classifier: Programming Language :: Python :: 3.7
|
14
|
+
Classifier: Programming Language :: Python :: 3.8
|
15
|
+
Classifier: Programming Language :: Python :: 3.9
|
16
|
+
Classifier: Programming Language :: Python :: 3.10
|
17
|
+
Classifier: Programming Language :: Python :: 3.11
|
18
|
+
Classifier: Programming Language :: Python :: 3.12
|
19
|
+
Classifier: Programming Language :: Python :: 3.13
|
20
|
+
Requires-Dist: keras
|
21
|
+
Requires-Dist: lightgbm
|
22
|
+
Requires-Dist: numpy
|
23
|
+
Requires-Dist: pandas
|
24
|
+
Requires-Dist: scikit-learn
|
25
|
+
Project-URL: Repository, https://github.com/peterhurford/mlgear
|
14
26
|
Description-Content-Type: text/markdown
|
15
27
|
|
16
28
|
## MLGear
|
@@ -45,6 +57,23 @@ results = run_cv_model(train, test, target, runLGB, lgb_params, rmse)
|
|
45
57
|
|
46
58
|
### Installation
|
47
59
|
|
48
|
-
|
60
|
+
```
|
61
|
+
pip install mlgear
|
62
|
+
```
|
63
|
+
|
64
|
+
For development:
|
49
65
|
|
66
|
+
```
|
67
|
+
# Install poetry if you don't have it
|
68
|
+
pip install poetry
|
69
|
+
|
70
|
+
# Install dependencies
|
71
|
+
poetry install
|
72
|
+
|
73
|
+
# Build the package
|
74
|
+
poetry build
|
75
|
+
|
76
|
+
# Publish to PyPI
|
77
|
+
poetry publish
|
78
|
+
```
|
50
79
|
|
@@ -0,0 +1,13 @@
|
|
1
|
+
mlgear/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
2
|
+
mlgear/aggregators.py,sha256=k_GGL8zuBqpBKPYR_v14SqPcJyAUalHcQN2o7gjApiw,373
|
3
|
+
mlgear/cv.py,sha256=Dk1ajLFITWgu5nQP-R1rpT-KQ9oYaQRhgSYqQi0IwX0,4009
|
4
|
+
mlgear/encoders.py,sha256=_NRqTNSdCNhT04Odxa9xRQq7nrr9bTFRwskARx1rgnU,5044
|
5
|
+
mlgear/lr_scheduler.py,sha256=S7DlTAWTzAUAQbmzm-yWIWI5r49Htz1jRBQ98IQHdgg,5272
|
6
|
+
mlgear/metrics.py,sha256=_zQwjz4X3-vUQEqu2yIfq2w2XnuH8YUSD_M_u6szToo,1188
|
7
|
+
mlgear/models.py,sha256=RtvmsjMFoecdce_ikj3fa9cukGdvQEj3Y72hz5Qw-fY,6249
|
8
|
+
mlgear/tracker.py,sha256=U2OXm9tjAWSc5B5_-oTvj_YAJdpkU4nsmPE7tH8BSN4,447
|
9
|
+
mlgear/utils.py,sha256=I72-qBgiisV1hcoUT5almb8GXwfmhTQgwvP6gl8kJEY,1096
|
10
|
+
mlgear-0.5.dist-info/LICENSE.txt,sha256=qkKmWAzXQC3lYVyoucB3x4iW2xnGEmaORCB4ADTAik4,1081
|
11
|
+
mlgear-0.5.dist-info/METADATA,sha256=sRhbP8yNadrRqDX3mY1wOKpjbWD2DPnLbwmibEFYvYc,1994
|
12
|
+
mlgear-0.5.dist-info/WHEEL,sha256=fGIA9gx4Qxk2KDKeNJCbOEwSrmLtjWCwzBz351GyrPQ,88
|
13
|
+
mlgear-0.5.dist-info/RECORD,,
|
mlgear-0.4.dist-info/RECORD
DELETED
@@ -1,14 +0,0 @@
|
|
1
|
-
mlgear/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
2
|
-
mlgear/aggregators.py,sha256=k_GGL8zuBqpBKPYR_v14SqPcJyAUalHcQN2o7gjApiw,373
|
3
|
-
mlgear/cv.py,sha256=6gHFOVHcisu6qs0mYUcHKmCvP4ygWBUnLmUOWBtBQ08,3837
|
4
|
-
mlgear/encoders.py,sha256=_NRqTNSdCNhT04Odxa9xRQq7nrr9bTFRwskARx1rgnU,5044
|
5
|
-
mlgear/lr_scheduler.py,sha256=S7DlTAWTzAUAQbmzm-yWIWI5r49Htz1jRBQ98IQHdgg,5272
|
6
|
-
mlgear/metrics.py,sha256=_zQwjz4X3-vUQEqu2yIfq2w2XnuH8YUSD_M_u6szToo,1188
|
7
|
-
mlgear/models.py,sha256=6X2VA7On6ioPFHDbB9YTbO3pXlI71GGJb-PhVK8nddY,5740
|
8
|
-
mlgear/tracker.py,sha256=U2OXm9tjAWSc5B5_-oTvj_YAJdpkU4nsmPE7tH8BSN4,447
|
9
|
-
mlgear/utils.py,sha256=E8lb0gsTf4tun7PHUQ5GFxwwxY3ZRxwIMzNCTZCb1rM,1032
|
10
|
-
mlgear-0.4.dist-info/LICENSE.txt,sha256=qkKmWAzXQC3lYVyoucB3x4iW2xnGEmaORCB4ADTAik4,1081
|
11
|
-
mlgear-0.4.dist-info/METADATA,sha256=mQSNZAZRZBNO1IU7EiifAN0w4u9ECVVkvKgl2TejG8o,1309
|
12
|
-
mlgear-0.4.dist-info/WHEEL,sha256=EVRjI69F5qVjm_YgqcTXPnTAv3BfSUr0WVAHuSP3Xoo,92
|
13
|
-
mlgear-0.4.dist-info/top_level.txt,sha256=TM51_lbw1nIKS5TvY-qVQEBGw1tMrgBKGUcB5BISu-Y,7
|
14
|
-
mlgear-0.4.dist-info/RECORD,,
|
@@ -1 +0,0 @@
|
|
1
|
-
mlgear
|
File without changes
|