mlgear 0.4__tar.gz → 0.5__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
mlgear-0.5/LICENSE.txt ADDED
@@ -0,0 +1,20 @@
1
+ The MIT License (MIT)
2
+
3
+ Copyright (c) 2020- Peter Hurford
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy of
6
+ this software and associated documentation files (the "Software"), to deal in
7
+ the Software without restriction, including without limitation the rights to
8
+ use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
9
+ the Software, and to permit persons to whom the Software is furnished to do so,
10
+ subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
17
+ FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
18
+ COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
19
+ IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
20
+ CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
mlgear-0.5/PKG-INFO ADDED
@@ -0,0 +1,79 @@
1
+ Metadata-Version: 2.3
2
+ Name: mlgear
3
+ Version: 0.5
4
+ Summary: Utility scripts for machine learning
5
+ License: MIT
6
+ Author: Peter Hurford
7
+ Author-email: peter@peterhurford.com
8
+ Requires-Python: >=3.7,<4.0
9
+ Classifier: Development Status :: 3 - Alpha
10
+ Classifier: License :: OSI Approved :: MIT License
11
+ Classifier: Operating System :: OS Independent
12
+ Classifier: Programming Language :: Python :: 3
13
+ Classifier: Programming Language :: Python :: 3.7
14
+ Classifier: Programming Language :: Python :: 3.8
15
+ Classifier: Programming Language :: Python :: 3.9
16
+ Classifier: Programming Language :: Python :: 3.10
17
+ Classifier: Programming Language :: Python :: 3.11
18
+ Classifier: Programming Language :: Python :: 3.12
19
+ Classifier: Programming Language :: Python :: 3.13
20
+ Requires-Dist: keras
21
+ Requires-Dist: lightgbm
22
+ Requires-Dist: numpy
23
+ Requires-Dist: pandas
24
+ Requires-Dist: scikit-learn
25
+ Project-URL: Repository, https://github.com/peterhurford/mlgear
26
+ Description-Content-Type: text/markdown
27
+
28
+ ## MLGear
29
+
30
+ Some utility functions to make ML with Python / Pandas / sklearn even easier
31
+
32
+ ### Example Usage
33
+
34
+ ```Python
35
+ from mlgear.cv import run_cv_model
36
+ from mlgear.models import runLGB
37
+ from mlgear.metrics import rmse
38
+
39
+ lgb_params = {'application': 'regression',
40
+ 'boosting': 'gbdt',
41
+ 'metric': 'rmse',
42
+ 'num_leaves': 15,
43
+ 'learning_rate': 0.01,
44
+ 'bagging_fraction': 0.9,
45
+ 'feature_fraction': 0.9,
46
+ 'verbosity': -1,
47
+ 'seed': 1,
48
+ 'lambda_l1': 1,
49
+ 'lambda_l2': 1,
50
+ 'early_stop': 20,
51
+ 'verbose_eval': 10,
52
+ 'num_rounds': 500,
53
+ 'num_threads': 3}
54
+
55
+ results = run_cv_model(train, test, target, runLGB, lgb_params, rmse)
56
+ ```
57
+
58
+ ### Installation
59
+
60
+ ```
61
+ pip install mlgear
62
+ ```
63
+
64
+ For development:
65
+
66
+ ```
67
+ # Install poetry if you don't have it
68
+ pip install poetry
69
+
70
+ # Install dependencies
71
+ poetry install
72
+
73
+ # Build the package
74
+ poetry build
75
+
76
+ # Publish to PyPI
77
+ poetry publish
78
+ ```
79
+
@@ -30,4 +30,22 @@ results = run_cv_model(train, test, target, runLGB, lgb_params, rmse)
30
30
 
31
31
  ### Installation
32
32
 
33
- `pip3 install mlgear`
33
+ ```
34
+ pip install mlgear
35
+ ```
36
+
37
+ For development:
38
+
39
+ ```
40
+ # Install poetry if you don't have it
41
+ pip install poetry
42
+
43
+ # Install dependencies
44
+ poetry install
45
+
46
+ # Build the package
47
+ poetry build
48
+
49
+ # Publish to PyPI
50
+ poetry publish
51
+ ```
@@ -51,7 +51,10 @@ def run_cv_model(train, test=None, target=None, model_fn=None, params={}, eval_f
51
51
  models[i] = model
52
52
  if importances is not None and isinstance(train, pd.DataFrame):
53
53
  fold_importance_df = pd.DataFrame()
54
- fold_importance_df['feature'] = train.columns.values
54
+ if params.get('group') is None:
55
+ fold_importance_df['feature'] = train.columns.values
56
+ else:
57
+ fold_importance_df['feature'] = [c for c in train.columns.values if c != params['group']]
55
58
  fold_importance_df['importance'] = importances
56
59
  fold_importance_df['fold'] = i
57
60
  feature_importance_df = pd.concat([feature_importance_df, fold_importance_df], axis=0)
@@ -11,16 +11,13 @@ from mlgear.utils import print_step
11
11
  def runLGB(train_X, train_y, test_X=None, test_y=None, test_X2=None, params={}, meta=None, verbose=True):
12
12
  if verbose:
13
13
  print_step('Prep LGB')
14
- d_train = lgb.Dataset(train_X, label=train_y)
15
- if test_X is not None:
16
- d_valid = lgb.Dataset(test_X, label=test_y)
17
- watchlist = [d_train, d_valid]
14
+
15
+ if params.get('group'):
16
+ group = params.pop('group')
18
17
  else:
19
- watchlist = [d_train]
20
- if verbose:
21
- print_step('Train LGB')
18
+ group = None
19
+
22
20
  num_rounds = params.pop('num_rounds')
23
- verbose_eval = params.pop('verbose_eval')
24
21
  early_stop = None
25
22
  if params.get('early_stop'):
26
23
  early_stop = params.pop('early_stop')
@@ -37,6 +34,31 @@ def runLGB(train_X, train_y, test_X=None, test_y=None, test_X2=None, params={},
37
34
  else:
38
35
  feval = None
39
36
 
37
+ if group is None:
38
+ d_train = lgb.Dataset(train_X, label=train_y)
39
+ else:
40
+ d_train = lgb.Dataset(train_X.drop(group, axis=1),
41
+ label=train_y,
42
+ group=train_X.groupby(group).size().to_numpy())
43
+
44
+ if test_X is not None:
45
+ if group is None:
46
+ d_valid = lgb.Dataset(test_X, label=test_y)
47
+ else:
48
+ d_valid = lgb.Dataset(test_X.drop(group, axis=1),
49
+ label=test_y,
50
+ group=test_X.groupby(group).size().to_numpy())
51
+ test_X = test_X.drop(group, axis=1)
52
+ watchlist = [d_train, d_valid]
53
+ else:
54
+ watchlist = [d_train]
55
+
56
+ if test_X2 is not None and group is not None:
57
+ test_X2 = test_X2.drop(group, axis=1)
58
+
59
+ if verbose:
60
+ print_step('Train LGB')
61
+
40
62
  preds_test_y = []
41
63
  preds_test_y2 = []
42
64
  for b in range(nbag):
@@ -45,9 +67,7 @@ def runLGB(train_X, train_y, test_X=None, test_y=None, test_X2=None, params={},
45
67
  train_set=d_train,
46
68
  num_boost_round=num_rounds,
47
69
  valid_sets=watchlist,
48
- verbose_eval=verbose_eval,
49
- early_stopping_rounds=early_stop,
50
- categorical_feature=cat_cols,
70
+ callbacks=[lgb.early_stopping(stopping_rounds=early_stop)] if early_stop else [],
51
71
  feval=feval)
52
72
  if test_X is not None:
53
73
  if verbose:
@@ -75,8 +95,7 @@ def get_lgb_feature_importance(train, target, params):
75
95
  train_d = lgb.Dataset(train, label=target)
76
96
  lgb_params2 = params.copy()
77
97
  rounds = lgb_params2.pop('num_rounds', 400)
78
- verbose_eval = lgb_params2.pop('verbose_eval', 100)
79
- model = lgb.train(lgb_params2, train_d, rounds, valid_sets = [train_d], verbose_eval=verbose_eval)
98
+ model = lgb.train(lgb_params2, train_d, rounds, valid_sets = [train_d])
80
99
  feature_df = pd.DataFrame(sorted(zip(model.feature_importance(), train.columns)),
81
100
  columns=['Value', 'Feature']).sort_values('Value', ascending=False)
82
101
  return feature_df
@@ -14,7 +14,7 @@ def show(df, max_rows=10, max_cols=None, digits=6):
14
14
 
15
15
 
16
16
  def display_column(df, var):
17
- if df[var].nunique() > 9 and (df[var].dtype == int or df[var].dtype == float):
17
+ if df[var].astype(str).nunique() > 9 and (df[var].dtype == int or df[var].dtype == float):
18
18
  print('Mean: {} Median: {} SD: {}'.format(df[var].mean(), df[var].median(), df[var].std()))
19
19
  else:
20
20
  print(df[var].value_counts(normalize=True) * 100)
@@ -36,3 +36,7 @@ def chunk(l, n):
36
36
  for i in range(0, len(l), n):
37
37
  out.append(l[i:i + n])
38
38
  return out
39
+
40
+
41
+ def min_max(dat):
42
+ return (min(dat), max(dat))
@@ -0,0 +1,29 @@
1
+ [tool.poetry]
2
+ name = "mlgear"
3
+ version = "0.5"
4
+ description = "Utility scripts for machine learning"
5
+ authors = ["Peter Hurford <peter@peterhurford.com>"]
6
+ license = "MIT"
7
+ readme = "README.md"
8
+ repository = "https://github.com/peterhurford/mlgear"
9
+ classifiers = [
10
+ "Development Status :: 3 - Alpha",
11
+ "Programming Language :: Python :: 3",
12
+ "License :: OSI Approved :: MIT License",
13
+ "Operating System :: OS Independent",
14
+ ]
15
+
16
+ [tool.poetry.dependencies]
17
+ python = "^3.7"
18
+ keras = "*"
19
+ lightgbm = "*"
20
+ numpy = "*"
21
+ pandas = "*"
22
+ scikit-learn = "*"
23
+
24
+ [tool.poetry.group.dev.dependencies]
25
+ flake8 = "*"
26
+
27
+ [build-system]
28
+ requires = ["poetry-core>=1.0.0"]
29
+ build-backend = "poetry.core.masonry.api"
mlgear-0.4/PKG-INFO DELETED
@@ -1,48 +0,0 @@
1
- Metadata-Version: 2.1
2
- Name: mlgear
3
- Version: 0.4
4
- Summary: Utility scripts for machine learning
5
- Home-page: https://github.com/peterhurford/mlgear
6
- Author: Peter Hurford
7
- Author-email: peter@peterhurford.com
8
- License: UNKNOWN
9
- Description: ## MLGear
10
-
11
- Some utility functions to make ML with Python / Pandas / sklearn even easier
12
-
13
- ### Example Usage
14
-
15
- ```Python
16
- from mlgear.cv import run_cv_model
17
- from mlgear.models import runLGB
18
- from mlgear.metrics import rmse
19
-
20
- lgb_params = {'application': 'regression',
21
- 'boosting': 'gbdt',
22
- 'metric': 'rmse',
23
- 'num_leaves': 15,
24
- 'learning_rate': 0.01,
25
- 'bagging_fraction': 0.9,
26
- 'feature_fraction': 0.9,
27
- 'verbosity': -1,
28
- 'seed': 1,
29
- 'lambda_l1': 1,
30
- 'lambda_l2': 1,
31
- 'early_stop': 20,
32
- 'verbose_eval': 10,
33
- 'num_rounds': 500,
34
- 'num_threads': 3}
35
-
36
- results = run_cv_model(train, test, target, runLGB, lgb_params, rmse)
37
- ```
38
-
39
- ### Installation
40
-
41
- `pip3 install mlgear`
42
-
43
- Platform: UNKNOWN
44
- Classifier: Development Status :: 3 - Alpha
45
- Classifier: Programming Language :: Python :: 3
46
- Classifier: License :: OSI Approved :: MIT License
47
- Classifier: Operating System :: OS Independent
48
- Description-Content-Type: text/markdown
@@ -1,48 +0,0 @@
1
- Metadata-Version: 2.1
2
- Name: mlgear
3
- Version: 0.4
4
- Summary: Utility scripts for machine learning
5
- Home-page: https://github.com/peterhurford/mlgear
6
- Author: Peter Hurford
7
- Author-email: peter@peterhurford.com
8
- License: UNKNOWN
9
- Description: ## MLGear
10
-
11
- Some utility functions to make ML with Python / Pandas / sklearn even easier
12
-
13
- ### Example Usage
14
-
15
- ```Python
16
- from mlgear.cv import run_cv_model
17
- from mlgear.models import runLGB
18
- from mlgear.metrics import rmse
19
-
20
- lgb_params = {'application': 'regression',
21
- 'boosting': 'gbdt',
22
- 'metric': 'rmse',
23
- 'num_leaves': 15,
24
- 'learning_rate': 0.01,
25
- 'bagging_fraction': 0.9,
26
- 'feature_fraction': 0.9,
27
- 'verbosity': -1,
28
- 'seed': 1,
29
- 'lambda_l1': 1,
30
- 'lambda_l2': 1,
31
- 'early_stop': 20,
32
- 'verbose_eval': 10,
33
- 'num_rounds': 500,
34
- 'num_threads': 3}
35
-
36
- results = run_cv_model(train, test, target, runLGB, lgb_params, rmse)
37
- ```
38
-
39
- ### Installation
40
-
41
- `pip3 install mlgear`
42
-
43
- Platform: UNKNOWN
44
- Classifier: Development Status :: 3 - Alpha
45
- Classifier: Programming Language :: Python :: 3
46
- Classifier: License :: OSI Approved :: MIT License
47
- Classifier: Operating System :: OS Independent
48
- Description-Content-Type: text/markdown
@@ -1,16 +0,0 @@
1
- README.md
2
- setup.cfg
3
- setup.py
4
- mlgear/__init__.py
5
- mlgear/aggregators.py
6
- mlgear/cv.py
7
- mlgear/encoders.py
8
- mlgear/lr_scheduler.py
9
- mlgear/metrics.py
10
- mlgear/models.py
11
- mlgear/tracker.py
12
- mlgear/utils.py
13
- mlgear.egg-info/PKG-INFO
14
- mlgear.egg-info/SOURCES.txt
15
- mlgear.egg-info/dependency_links.txt
16
- mlgear.egg-info/top_level.txt
@@ -1 +0,0 @@
1
- mlgear
mlgear-0.4/setup.cfg DELETED
@@ -1,7 +0,0 @@
1
- [flake8]
2
- max-line-length = 100
3
-
4
- [egg_info]
5
- tag_build =
6
- tag_date = 0
7
-
mlgear-0.4/setup.py DELETED
@@ -1,22 +0,0 @@
1
- import setuptools
2
-
3
- with open('README.md', 'r') as fh:
4
- long_description = fh.read()
5
-
6
- setuptools.setup(
7
- name='mlgear',
8
- version='0.4',
9
- author='Peter Hurford',
10
- author_email='peter@peterhurford.com',
11
- description='Utility scripts for machine learning',
12
- long_description=long_description,
13
- long_description_content_type='text/markdown',
14
- url='https://github.com/peterhurford/mlgear',
15
- packages=setuptools.find_packages(),
16
- classifiers=[
17
- 'Development Status :: 3 - Alpha',
18
- 'Programming Language :: Python :: 3',
19
- 'License :: OSI Approved :: MIT License',
20
- 'Operating System :: OS Independent',
21
- ],
22
- )
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes