mlgear 0.3__tar.gz → 0.5__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
mlgear-0.5/LICENSE.txt ADDED
@@ -0,0 +1,20 @@
1
+ The MIT License (MIT)
2
+
3
+ Copyright (c) 2020- Peter Hurford
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy of
6
+ this software and associated documentation files (the "Software"), to deal in
7
+ the Software without restriction, including without limitation the rights to
8
+ use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
9
+ the Software, and to permit persons to whom the Software is furnished to do so,
10
+ subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
17
+ FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
18
+ COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
19
+ IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
20
+ CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
mlgear-0.5/PKG-INFO ADDED
@@ -0,0 +1,79 @@
1
+ Metadata-Version: 2.3
2
+ Name: mlgear
3
+ Version: 0.5
4
+ Summary: Utility scripts for machine learning
5
+ License: MIT
6
+ Author: Peter Hurford
7
+ Author-email: peter@peterhurford.com
8
+ Requires-Python: >=3.7,<4.0
9
+ Classifier: Development Status :: 3 - Alpha
10
+ Classifier: License :: OSI Approved :: MIT License
11
+ Classifier: Operating System :: OS Independent
12
+ Classifier: Programming Language :: Python :: 3
13
+ Classifier: Programming Language :: Python :: 3.7
14
+ Classifier: Programming Language :: Python :: 3.8
15
+ Classifier: Programming Language :: Python :: 3.9
16
+ Classifier: Programming Language :: Python :: 3.10
17
+ Classifier: Programming Language :: Python :: 3.11
18
+ Classifier: Programming Language :: Python :: 3.12
19
+ Classifier: Programming Language :: Python :: 3.13
20
+ Requires-Dist: keras
21
+ Requires-Dist: lightgbm
22
+ Requires-Dist: numpy
23
+ Requires-Dist: pandas
24
+ Requires-Dist: scikit-learn
25
+ Project-URL: Repository, https://github.com/peterhurford/mlgear
26
+ Description-Content-Type: text/markdown
27
+
28
+ ## MLGear
29
+
30
+ Some utility functions to make ML with Python / Pandas / sklearn even easier
31
+
32
+ ### Example Usage
33
+
34
+ ```Python
35
+ from mlgear.cv import run_cv_model
36
+ from mlgear.models import runLGB
37
+ from mlgear.metrics import rmse
38
+
39
+ lgb_params = {'application': 'regression',
40
+ 'boosting': 'gbdt',
41
+ 'metric': 'rmse',
42
+ 'num_leaves': 15,
43
+ 'learning_rate': 0.01,
44
+ 'bagging_fraction': 0.9,
45
+ 'feature_fraction': 0.9,
46
+ 'verbosity': -1,
47
+ 'seed': 1,
48
+ 'lambda_l1': 1,
49
+ 'lambda_l2': 1,
50
+ 'early_stop': 20,
51
+ 'verbose_eval': 10,
52
+ 'num_rounds': 500,
53
+ 'num_threads': 3}
54
+
55
+ results = run_cv_model(train, test, target, runLGB, lgb_params, rmse)
56
+ ```
57
+
58
+ ### Installation
59
+
60
+ ```
61
+ pip install mlgear
62
+ ```
63
+
64
+ For development:
65
+
66
+ ```
67
+ # Install poetry if you don't have it
68
+ pip install poetry
69
+
70
+ # Install dependencies
71
+ poetry install
72
+
73
+ # Build the package
74
+ poetry build
75
+
76
+ # Publish to PyPI
77
+ poetry publish
78
+ ```
79
+
@@ -2,7 +2,7 @@
2
2
 
3
3
  Some utility functions to make ML with Python / Pandas / sklearn even easier
4
4
 
5
- #### Example Usage
5
+ ### Example Usage
6
6
 
7
7
  ```Python
8
8
  from mlgear.cv import run_cv_model
@@ -27,3 +27,25 @@ lgb_params = {'application': 'regression',
27
27
 
28
28
  results = run_cv_model(train, test, target, runLGB, lgb_params, rmse)
29
29
  ```
30
+
31
+ ### Installation
32
+
33
+ ```
34
+ pip install mlgear
35
+ ```
36
+
37
+ For development:
38
+
39
+ ```
40
+ # Install poetry if you don't have it
41
+ pip install poetry
42
+
43
+ # Install dependencies
44
+ poetry install
45
+
46
+ # Build the package
47
+ poetry build
48
+
49
+ # Publish to PyPI
50
+ poetry publish
51
+ ```
@@ -0,0 +1,97 @@
1
+ import numpy as np
2
+ import pandas as pd
3
+
4
+ from sklearn.model_selection import KFold
5
+
6
+ from mlgear.utils import print_step
7
+
8
+
9
+ def run_cv_model(train, test=None, target=None, model_fn=None, params={}, eval_fn=None, label='model', n_folds=5, fold_splits=None, classes=1, stop_on_fold=None, train_on_full=False, verbose=True):
10
+ if target is None:
11
+ raise ValueError('Target is needed.')
12
+ if model_fn is None:
13
+ raise ValueError('model function is needed.')
14
+ if not fold_splits:
15
+ kf = KFold(n_splits=n_folds, random_state=42, shuffle=True)
16
+ fold_splits = kf.split(train)
17
+ cv_scores = []
18
+ models = {}
19
+ if classes > 1 and test is not None:
20
+ pred_full_test = np.zeros((test.shape[0], classes))
21
+ else:
22
+ pred_full_test = 0
23
+ if classes > 1:
24
+ pred_train = np.zeros((train.shape[0], classes))
25
+ else:
26
+ pred_train = np.zeros(train.shape[0])
27
+ feature_importance_df = pd.DataFrame()
28
+ i = 1
29
+ for dev_index, val_index in fold_splits:
30
+ if verbose:
31
+ print_step('Started ' + label + ' fold ' + str(i) + '/' + str(n_folds))
32
+ if isinstance(train, pd.DataFrame):
33
+ dev_X, val_X = train.iloc[dev_index], train.iloc[val_index]
34
+ else:
35
+ dev_X, val_X = train[dev_index], train[val_index]
36
+ dev_y, val_y = target[dev_index], target[val_index]
37
+ params2 = params.copy()
38
+ meta = {'dev_index': dev_index,
39
+ 'val_index': val_index,
40
+ 'fold': i,
41
+ 'label': label}
42
+ pred_val_y, pred_test_y, importances, model = model_fn(dev_X, dev_y, val_X, val_y, test, params2, meta, verbose=verbose)
43
+ if test is not None:
44
+ pred_full_test = pred_full_test + pred_test_y
45
+ pred_train[val_index] = pred_val_y
46
+ if eval_fn is not None:
47
+ cv_score = eval_fn(val_y, pred_val_y)
48
+ cv_scores.append(cv_score)
49
+ if verbose:
50
+ print_step(label + ' cv score {}: {}'.format(i, cv_score))
51
+ models[i] = model
52
+ if importances is not None and isinstance(train, pd.DataFrame):
53
+ fold_importance_df = pd.DataFrame()
54
+ if params.get('group') is None:
55
+ fold_importance_df['feature'] = train.columns.values
56
+ else:
57
+ fold_importance_df['feature'] = [c for c in train.columns.values if c != params['group']]
58
+ fold_importance_df['importance'] = importances
59
+ fold_importance_df['fold'] = i
60
+ feature_importance_df = pd.concat([feature_importance_df, fold_importance_df], axis=0)
61
+ if stop_on_fold and stop_on_fold == i:
62
+ results = {'label': label,
63
+ 'train': pred_train,
64
+ 'cv': cv_scores,
65
+ 'importance': feature_importance_df,
66
+ 'model': models}
67
+ if test is not None:
68
+ results['test'] = pred_full_test
69
+ return results
70
+ i += 1
71
+
72
+ if train_on_full:
73
+ if verbose:
74
+ print_step('## Training on full ##')
75
+ params2 = params.copy()
76
+ _, pred_full_test, importances, model = model_fn(train, target, None, None, test, params2, verbose=verbose)
77
+ models['full'] = model
78
+ elif test is not None:
79
+ pred_full_test = pred_full_test / n_folds
80
+
81
+ final_cv = eval_fn(target, pred_train) if eval_fn else None
82
+
83
+ if verbose:
84
+ print_step('{} cv scores : {}'.format(label, cv_scores))
85
+ print_step('{} cv mean score : {}'.format(label, np.mean(cv_scores)))
86
+ print_step('{} cv total score : {}'.format(label, final_cv))
87
+ print_step('{} cv std score : {}'.format(label, np.std(cv_scores)))
88
+
89
+ results = {'label': label,
90
+ 'train': pred_train,
91
+ 'cv': cv_scores,
92
+ 'final_cv': final_cv,
93
+ 'importance': feature_importance_df,
94
+ 'model': models}
95
+ if test is not None:
96
+ results['test'] = pred_full_test
97
+ return results
@@ -8,17 +8,16 @@ from sklearn.preprocessing import StandardScaler
8
8
  from mlgear.utils import print_step
9
9
 
10
10
 
11
- def runLGB(train_X, train_y, test_X=None, test_y=None, test_X2=None, params={}, meta=None):
12
- print('Prep LGB')
13
- d_train = lgb.Dataset(train_X, label=train_y)
14
- if test_X is not None:
15
- d_valid = lgb.Dataset(test_X, label=test_y)
16
- watchlist = [d_train, d_valid]
11
+ def runLGB(train_X, train_y, test_X=None, test_y=None, test_X2=None, params={}, meta=None, verbose=True):
12
+ if verbose:
13
+ print_step('Prep LGB')
14
+
15
+ if params.get('group'):
16
+ group = params.pop('group')
17
17
  else:
18
- watchlist = [d_train]
19
- print('Train LGB')
18
+ group = None
19
+
20
20
  num_rounds = params.pop('num_rounds')
21
- verbose_eval = params.pop('verbose_eval')
22
21
  early_stop = None
23
22
  if params.get('early_stop'):
24
23
  early_stop = params.pop('early_stop')
@@ -35,6 +34,31 @@ def runLGB(train_X, train_y, test_X=None, test_y=None, test_X2=None, params={},
35
34
  else:
36
35
  feval = None
37
36
 
37
+ if group is None:
38
+ d_train = lgb.Dataset(train_X, label=train_y)
39
+ else:
40
+ d_train = lgb.Dataset(train_X.drop(group, axis=1),
41
+ label=train_y,
42
+ group=train_X.groupby(group).size().to_numpy())
43
+
44
+ if test_X is not None:
45
+ if group is None:
46
+ d_valid = lgb.Dataset(test_X, label=test_y)
47
+ else:
48
+ d_valid = lgb.Dataset(test_X.drop(group, axis=1),
49
+ label=test_y,
50
+ group=test_X.groupby(group).size().to_numpy())
51
+ test_X = test_X.drop(group, axis=1)
52
+ watchlist = [d_train, d_valid]
53
+ else:
54
+ watchlist = [d_train]
55
+
56
+ if test_X2 is not None and group is not None:
57
+ test_X2 = test_X2.drop(group, axis=1)
58
+
59
+ if verbose:
60
+ print_step('Train LGB')
61
+
38
62
  preds_test_y = []
39
63
  preds_test_y2 = []
40
64
  for b in range(nbag):
@@ -43,16 +67,16 @@ def runLGB(train_X, train_y, test_X=None, test_y=None, test_X2=None, params={},
43
67
  train_set=d_train,
44
68
  num_boost_round=num_rounds,
45
69
  valid_sets=watchlist,
46
- verbose_eval=verbose_eval,
47
- early_stopping_rounds=early_stop,
48
- categorical_feature=cat_cols,
70
+ callbacks=[lgb.early_stopping(stopping_rounds=early_stop)] if early_stop else [],
49
71
  feval=feval)
50
72
  if test_X is not None:
51
- print('Predict 1/2')
73
+ if verbose:
74
+ print_step('Predict 1/2')
52
75
  pred_test_y = model.predict(test_X, num_iteration=model.best_iteration)
53
76
  preds_test_y += [pred_test_y]
54
77
  if test_X2 is not None:
55
- print('Predict 2/2')
78
+ if verbose:
79
+ print_step('Predict 2/2')
56
80
  pred_test_y2 = model.predict(test_X2, num_iteration=model.best_iteration)
57
81
  preds_test_y2 += [pred_test_y2]
58
82
 
@@ -71,21 +95,22 @@ def get_lgb_feature_importance(train, target, params):
71
95
  train_d = lgb.Dataset(train, label=target)
72
96
  lgb_params2 = params.copy()
73
97
  rounds = lgb_params2.pop('num_rounds', 400)
74
- verbose_eval = lgb_params2.pop('verbose_eval', 100)
75
- model = lgb.train(lgb_params2, train_d, rounds, valid_sets = [train_d], verbose_eval=verbose_eval)
98
+ model = lgb.train(lgb_params2, train_d, rounds, valid_sets = [train_d])
76
99
  feature_df = pd.DataFrame(sorted(zip(model.feature_importance(), train.columns)),
77
100
  columns=['Value', 'Feature']).sort_values('Value', ascending=False)
78
101
  return feature_df
79
102
 
80
103
 
81
- def runMLP(train_X, train_y, test_X=None, test_y=None, test_X2=None, params={}, meta=None):
82
- print('Define Model')
104
+ def runMLP(train_X, train_y, test_X=None, test_y=None, test_X2=None, params={}, meta=None, verbose=True):
105
+ if verbose:
106
+ print_step('Define Model')
83
107
  model = params['model'](params['input_size'])
84
108
  es = params['early_stopper']()
85
109
  es.set_model(model)
86
110
  metric = params['metric']
87
111
  metric = metric(model, [es], [(train_X, train_y), (test_X, test_y)])
88
- print('Fit MLP')
112
+ if verbose:
113
+ print_step('Fit MLP')
89
114
  model.fit(train_X, train_y,
90
115
  verbose=params.get('model_verbose', 0),
91
116
  callbacks=[metric] + params['lr_scheduler'](),
@@ -93,12 +118,14 @@ def runMLP(train_X, train_y, test_X=None, test_y=None, test_X2=None, params={},
93
118
  validation_data=(test_X, test_y),
94
119
  batch_size=params.get('batch_size', 128))
95
120
  if test_X is not None:
96
- print('MLP Predict 1/2')
121
+ if verbose:
122
+ print_step('MLP Predict 1/2')
97
123
  pred_test_y = model.predict(test_X)
98
124
  else:
99
125
  pred_test_y = None
100
126
  if test_X2 is not None:
101
- print('MLP Predict 2/2')
127
+ if verbose:
128
+ print_step('MLP Predict 2/2')
102
129
  pred_test_y2 = model.predict(test_X2)
103
130
  else:
104
131
  pred_test_y2 = None
@@ -106,10 +133,11 @@ def runMLP(train_X, train_y, test_X=None, test_y=None, test_X2=None, params={},
106
133
  return pred_test_y, pred_test_y2, None, model
107
134
 
108
135
 
109
- def runLR(train_X, train_y, test_X=None, test_y=None, test_X2=None, params={}, meta=None):
136
+ def runLR(train_X, train_y, test_X=None, test_y=None, test_X2=None, params={}, meta=None, verbose=True):
110
137
  params['random_state'] = 42
111
138
  if params.get('scale'):
112
- print_step('Scale')
139
+ if verbose:
140
+ print_step('Scale')
113
141
  params.pop('scale')
114
142
  scaler = StandardScaler()
115
143
  scaler.fit(train_X.values)
@@ -119,33 +147,39 @@ def runLR(train_X, train_y, test_X=None, test_y=None, test_X2=None, params={}, m
119
147
  if test_X2 is not None:
120
148
  test_X2 = scaler.transform(test_X2.values)
121
149
 
122
- print_step('Train LR')
150
+ if verbose:
151
+ print_step('Train LR')
123
152
  model = LogisticRegression(**params)
124
153
  model.fit(train_X, train_y)
125
154
  if test_X is not None:
126
- print_step('Predict 1/2')
155
+ if verbose:
156
+ print_step('Predict 1/2')
127
157
  pred_test_y = model.predict_proba(test_X)[:, 1]
128
158
  else:
129
159
  pred_test_y = None
130
160
  if test_X2 is not None:
131
- print_step('Predict 2/2')
161
+ if verbose:
162
+ print_step('Predict 2/2')
132
163
  pred_test_y2 = model.predict_proba(test_X2)[:, 1]
133
164
  else:
134
165
  pred_test_y2 = None
135
166
  return pred_test_y, pred_test_y2, model.coef_, model
136
167
 
137
168
 
138
- def runRidge(train_X, train_y, test_X=None, test_y=None, test_X2=None, params={}, meta=None):
169
+ def runRidge(train_X, train_y, test_X=None, test_y=None, test_X2=None, params={}, meta=None, verbose=True):
139
170
  model = Ridge(**params)
140
- print_step('Fit Ridge')
171
+ if verbose:
172
+ print_step('Fit Ridge')
141
173
  model.fit(train_X, train_y)
142
174
  if test_X is not None:
143
- print_step('Ridge Predict 1/2')
175
+ if verbose:
176
+ print_step('Ridge Predict 1/2')
144
177
  pred_test_y = model.predict(test_X)
145
178
  else:
146
179
  pred_test_y = None
147
180
  if test_X2 is not None:
148
- print_step('Ridge Predict 2/2')
181
+ if verbose:
182
+ print_step('Ridge Predict 2/2')
149
183
  pred_test_y2 = model.predict(test_X2)
150
184
  else:
151
185
  pred_test_y2 = None
@@ -14,7 +14,7 @@ def show(df, max_rows=10, max_cols=None, digits=6):
14
14
 
15
15
 
16
16
  def display_column(df, var):
17
- if df[var].nunique() > 9 and (df[var].dtype == int or df[var].dtype == float):
17
+ if df[var].astype(str).nunique() > 9 and (df[var].dtype == int or df[var].dtype == float):
18
18
  print('Mean: {} Median: {} SD: {}'.format(df[var].mean(), df[var].median(), df[var].std()))
19
19
  else:
20
20
  print(df[var].value_counts(normalize=True) * 100)
@@ -36,3 +36,7 @@ def chunk(l, n):
36
36
  for i in range(0, len(l), n):
37
37
  out.append(l[i:i + n])
38
38
  return out
39
+
40
+
41
+ def min_max(dat):
42
+ return (min(dat), max(dat))
@@ -0,0 +1,29 @@
1
+ [tool.poetry]
2
+ name = "mlgear"
3
+ version = "0.5"
4
+ description = "Utility scripts for machine learning"
5
+ authors = ["Peter Hurford <peter@peterhurford.com>"]
6
+ license = "MIT"
7
+ readme = "README.md"
8
+ repository = "https://github.com/peterhurford/mlgear"
9
+ classifiers = [
10
+ "Development Status :: 3 - Alpha",
11
+ "Programming Language :: Python :: 3",
12
+ "License :: OSI Approved :: MIT License",
13
+ "Operating System :: OS Independent",
14
+ ]
15
+
16
+ [tool.poetry.dependencies]
17
+ python = "^3.7"
18
+ keras = "*"
19
+ lightgbm = "*"
20
+ numpy = "*"
21
+ pandas = "*"
22
+ scikit-learn = "*"
23
+
24
+ [tool.poetry.group.dev.dependencies]
25
+ flake8 = "*"
26
+
27
+ [build-system]
28
+ requires = ["poetry-core>=1.0.0"]
29
+ build-backend = "poetry.core.masonry.api"
mlgear-0.3/PKG-INFO DELETED
@@ -1,44 +0,0 @@
1
- Metadata-Version: 2.1
2
- Name: mlgear
3
- Version: 0.3
4
- Summary: Utility scripts for machine learning
5
- Home-page: https://github.com/peterhurford/mlgear
6
- Author: Peter Hurford
7
- Author-email: peter@peterhurford.com
8
- License: UNKNOWN
9
- Description: ## MLGear
10
-
11
- Some utility functions to make ML with Python / Pandas / sklearn even easier
12
-
13
- #### Example Usage
14
-
15
- ```Python
16
- from mlgear.cv import run_cv_model
17
- from mlgear.models import runLGB
18
- from mlgear.metrics import rmse
19
-
20
- lgb_params = {'application': 'regression',
21
- 'boosting': 'gbdt',
22
- 'metric': 'rmse',
23
- 'num_leaves': 15,
24
- 'learning_rate': 0.01,
25
- 'bagging_fraction': 0.9,
26
- 'feature_fraction': 0.9,
27
- 'verbosity': -1,
28
- 'seed': 1,
29
- 'lambda_l1': 1,
30
- 'lambda_l2': 1,
31
- 'early_stop': 20,
32
- 'verbose_eval': 10,
33
- 'num_rounds': 500,
34
- 'num_threads': 3}
35
-
36
- results = run_cv_model(train, test, target, runLGB, lgb_params, rmse)
37
- ```
38
-
39
- Platform: UNKNOWN
40
- Classifier: Development Status :: 3 - Alpha
41
- Classifier: Programming Language :: Python :: 3
42
- Classifier: License :: OSI Approved :: MIT License
43
- Classifier: Operating System :: OS Independent
44
- Description-Content-Type: text/markdown
mlgear-0.3/mlgear/cv.py DELETED
@@ -1,88 +0,0 @@
1
- import numpy as np
2
- import pandas as pd
3
-
4
- from sklearn.model_selection import KFold
5
-
6
-
7
- def run_cv_model(train, test=None, target=None, model_fn=None, params={}, eval_fn=None, label='model', n_folds=5, fold_splits=None, classes=1, stop_on_fold=None, train_on_full=False):
8
- if target is None:
9
- raise ValueError('Target is needed.')
10
- if model_fn is None:
11
- raise ValueError('model function is needed.')
12
- if not fold_splits:
13
- kf = KFold(n_splits=n_folds, random_state=42, shuffle=True)
14
- fold_splits = kf.split(train)
15
- cv_scores = []
16
- models = {}
17
- if classes > 1 and test is not None:
18
- pred_full_test = np.zeros((test.shape[0], classes))
19
- else:
20
- pred_full_test = 0
21
- if classes > 1:
22
- pred_train = np.zeros((train.shape[0], classes))
23
- else:
24
- pred_train = np.zeros(train.shape[0])
25
- feature_importance_df = pd.DataFrame()
26
- i = 1
27
- for dev_index, val_index in fold_splits:
28
- print('Started ' + label + ' fold ' + str(i) + '/' + str(n_folds))
29
- if isinstance(train, pd.DataFrame):
30
- dev_X, val_X = train.iloc[dev_index], train.iloc[val_index]
31
- else:
32
- dev_X, val_X = train[dev_index], train[val_index]
33
- dev_y, val_y = target[dev_index], target[val_index]
34
- params2 = params.copy()
35
- meta = {'dev_index': dev_index,
36
- 'val_index': val_index,
37
- 'fold': i,
38
- 'label': label}
39
- pred_val_y, pred_test_y, importances, model = model_fn(dev_X, dev_y, val_X, val_y, test, params2, meta)
40
- if test is not None:
41
- pred_full_test = pred_full_test + pred_test_y
42
- pred_train[val_index] = pred_val_y
43
- if eval_fn is not None:
44
- cv_score = eval_fn(val_y, pred_val_y)
45
- cv_scores.append(cv_score)
46
- print(label + ' cv score {}: {}'.format(i, cv_score))
47
- models[i] = model
48
- if importances is not None and isinstance(train, pd.DataFrame):
49
- fold_importance_df = pd.DataFrame()
50
- fold_importance_df['feature'] = train.columns.values
51
- fold_importance_df['importance'] = importances
52
- fold_importance_df['fold'] = i
53
- feature_importance_df = pd.concat([feature_importance_df, fold_importance_df], axis=0)
54
- if stop_on_fold and stop_on_fold == i:
55
- results = {'label': label,
56
- 'train': pred_train,
57
- 'cv': cv_scores,
58
- 'importance': feature_importance_df,
59
- 'model': models}
60
- if test is not None:
61
- results['test'] = pred_full_test
62
- return results
63
- i += 1
64
-
65
- if train_on_full:
66
- print('## Training on full ##')
67
- params2 = params.copy()
68
- _, pred_full_test, importances, model = model_fn(train, target, None, None, test, params2)
69
- models['full'] = model
70
- elif test is not None:
71
- pred_full_test = pred_full_test / n_folds
72
-
73
- final_cv = eval_fn(target, pred_train) if eval_fn else None
74
-
75
- print('{} cv scores : {}'.format(label, cv_scores))
76
- print('{} cv mean score : {}'.format(label, np.mean(cv_scores)))
77
- print('{} cv total score : {}'.format(label, final_cv))
78
- print('{} cv std score : {}'.format(label, np.std(cv_scores)))
79
-
80
- results = {'label': label,
81
- 'train': pred_train,
82
- 'cv': cv_scores,
83
- 'final_cv': final_cv,
84
- 'importance': feature_importance_df,
85
- 'model': models}
86
- if test is not None:
87
- results['test'] = pred_full_test
88
- return results
@@ -1,44 +0,0 @@
1
- Metadata-Version: 2.1
2
- Name: mlgear
3
- Version: 0.3
4
- Summary: Utility scripts for machine learning
5
- Home-page: https://github.com/peterhurford/mlgear
6
- Author: Peter Hurford
7
- Author-email: peter@peterhurford.com
8
- License: UNKNOWN
9
- Description: ## MLGear
10
-
11
- Some utility functions to make ML with Python / Pandas / sklearn even easier
12
-
13
- #### Example Usage
14
-
15
- ```Python
16
- from mlgear.cv import run_cv_model
17
- from mlgear.models import runLGB
18
- from mlgear.metrics import rmse
19
-
20
- lgb_params = {'application': 'regression',
21
- 'boosting': 'gbdt',
22
- 'metric': 'rmse',
23
- 'num_leaves': 15,
24
- 'learning_rate': 0.01,
25
- 'bagging_fraction': 0.9,
26
- 'feature_fraction': 0.9,
27
- 'verbosity': -1,
28
- 'seed': 1,
29
- 'lambda_l1': 1,
30
- 'lambda_l2': 1,
31
- 'early_stop': 20,
32
- 'verbose_eval': 10,
33
- 'num_rounds': 500,
34
- 'num_threads': 3}
35
-
36
- results = run_cv_model(train, test, target, runLGB, lgb_params, rmse)
37
- ```
38
-
39
- Platform: UNKNOWN
40
- Classifier: Development Status :: 3 - Alpha
41
- Classifier: Programming Language :: Python :: 3
42
- Classifier: License :: OSI Approved :: MIT License
43
- Classifier: Operating System :: OS Independent
44
- Description-Content-Type: text/markdown
@@ -1,16 +0,0 @@
1
- README.md
2
- setup.cfg
3
- setup.py
4
- mlgear/__init__.py
5
- mlgear/aggregators.py
6
- mlgear/cv.py
7
- mlgear/encoders.py
8
- mlgear/lr_scheduler.py
9
- mlgear/metrics.py
10
- mlgear/models.py
11
- mlgear/tracker.py
12
- mlgear/utils.py
13
- mlgear.egg-info/PKG-INFO
14
- mlgear.egg-info/SOURCES.txt
15
- mlgear.egg-info/dependency_links.txt
16
- mlgear.egg-info/top_level.txt
@@ -1 +0,0 @@
1
- mlgear
mlgear-0.3/setup.cfg DELETED
@@ -1,7 +0,0 @@
1
- [flake8]
2
- max-line-length = 100
3
-
4
- [egg_info]
5
- tag_build =
6
- tag_date = 0
7
-
mlgear-0.3/setup.py DELETED
@@ -1,22 +0,0 @@
1
- import setuptools
2
-
3
- with open('README.md', 'r') as fh:
4
- long_description = fh.read()
5
-
6
- setuptools.setup(
7
- name='mlgear',
8
- version='0.3',
9
- author='Peter Hurford',
10
- author_email='peter@peterhurford.com',
11
- description='Utility scripts for machine learning',
12
- long_description=long_description,
13
- long_description_content_type='text/markdown',
14
- url='https://github.com/peterhurford/mlgear',
15
- packages=setuptools.find_packages(),
16
- classifiers=[
17
- 'Development Status :: 3 - Alpha',
18
- 'Programming Language :: Python :: 3',
19
- 'License :: OSI Approved :: MIT License',
20
- 'Operating System :: OS Independent',
21
- ],
22
- )
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes