mlgear 0.4__py3-none-any.whl → 0.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
mlgear/cv.py CHANGED
@@ -51,7 +51,10 @@ def run_cv_model(train, test=None, target=None, model_fn=None, params={}, eval_f
51
51
  models[i] = model
52
52
  if importances is not None and isinstance(train, pd.DataFrame):
53
53
  fold_importance_df = pd.DataFrame()
54
- fold_importance_df['feature'] = train.columns.values
54
+ if params.get('group') is None:
55
+ fold_importance_df['feature'] = train.columns.values
56
+ else:
57
+ fold_importance_df['feature'] = [c for c in train.columns.values if c != params['group']]
55
58
  fold_importance_df['importance'] = importances
56
59
  fold_importance_df['fold'] = i
57
60
  feature_importance_df = pd.concat([feature_importance_df, fold_importance_df], axis=0)
mlgear/models.py CHANGED
@@ -11,16 +11,13 @@ from mlgear.utils import print_step
11
11
  def runLGB(train_X, train_y, test_X=None, test_y=None, test_X2=None, params={}, meta=None, verbose=True):
12
12
  if verbose:
13
13
  print_step('Prep LGB')
14
- d_train = lgb.Dataset(train_X, label=train_y)
15
- if test_X is not None:
16
- d_valid = lgb.Dataset(test_X, label=test_y)
17
- watchlist = [d_train, d_valid]
14
+
15
+ if params.get('group'):
16
+ group = params.pop('group')
18
17
  else:
19
- watchlist = [d_train]
20
- if verbose:
21
- print_step('Train LGB')
18
+ group = None
19
+
22
20
  num_rounds = params.pop('num_rounds')
23
- verbose_eval = params.pop('verbose_eval')
24
21
  early_stop = None
25
22
  if params.get('early_stop'):
26
23
  early_stop = params.pop('early_stop')
@@ -37,6 +34,31 @@ def runLGB(train_X, train_y, test_X=None, test_y=None, test_X2=None, params={},
37
34
  else:
38
35
  feval = None
39
36
 
37
+ if group is None:
38
+ d_train = lgb.Dataset(train_X, label=train_y)
39
+ else:
40
+ d_train = lgb.Dataset(train_X.drop(group, axis=1),
41
+ label=train_y,
42
+ group=train_X.groupby(group).size().to_numpy())
43
+
44
+ if test_X is not None:
45
+ if group is None:
46
+ d_valid = lgb.Dataset(test_X, label=test_y)
47
+ else:
48
+ d_valid = lgb.Dataset(test_X.drop(group, axis=1),
49
+ label=test_y,
50
+ group=test_X.groupby(group).size().to_numpy())
51
+ test_X = test_X.drop(group, axis=1)
52
+ watchlist = [d_train, d_valid]
53
+ else:
54
+ watchlist = [d_train]
55
+
56
+ if test_X2 is not None and group is not None:
57
+ test_X2 = test_X2.drop(group, axis=1)
58
+
59
+ if verbose:
60
+ print_step('Train LGB')
61
+
40
62
  preds_test_y = []
41
63
  preds_test_y2 = []
42
64
  for b in range(nbag):
@@ -45,9 +67,7 @@ def runLGB(train_X, train_y, test_X=None, test_y=None, test_X2=None, params={},
45
67
  train_set=d_train,
46
68
  num_boost_round=num_rounds,
47
69
  valid_sets=watchlist,
48
- verbose_eval=verbose_eval,
49
- early_stopping_rounds=early_stop,
50
- categorical_feature=cat_cols,
70
+ callbacks=[lgb.early_stopping(stopping_rounds=early_stop)] if early_stop else [],
51
71
  feval=feval)
52
72
  if test_X is not None:
53
73
  if verbose:
@@ -75,8 +95,7 @@ def get_lgb_feature_importance(train, target, params):
75
95
  train_d = lgb.Dataset(train, label=target)
76
96
  lgb_params2 = params.copy()
77
97
  rounds = lgb_params2.pop('num_rounds', 400)
78
- verbose_eval = lgb_params2.pop('verbose_eval', 100)
79
- model = lgb.train(lgb_params2, train_d, rounds, valid_sets = [train_d], verbose_eval=verbose_eval)
98
+ model = lgb.train(lgb_params2, train_d, rounds, valid_sets = [train_d])
80
99
  feature_df = pd.DataFrame(sorted(zip(model.feature_importance(), train.columns)),
81
100
  columns=['Value', 'Feature']).sort_values('Value', ascending=False)
82
101
  return feature_df
mlgear/utils.py CHANGED
@@ -14,7 +14,7 @@ def show(df, max_rows=10, max_cols=None, digits=6):
14
14
 
15
15
 
16
16
  def display_column(df, var):
17
- if df[var].nunique() > 9 and (df[var].dtype == int or df[var].dtype == float):
17
+ if df[var].astype(str).nunique() > 9 and (df[var].dtype == int or df[var].dtype == float):
18
18
  print('Mean: {} Median: {} SD: {}'.format(df[var].mean(), df[var].median(), df[var].std()))
19
19
  else:
20
20
  print(df[var].value_counts(normalize=True) * 100)
@@ -36,3 +36,7 @@ def chunk(l, n):
36
36
  for i in range(0, len(l), n):
37
37
  out.append(l[i:i + n])
38
38
  return out
39
+
40
+
41
+ def min_max(dat):
42
+ return (min(dat), max(dat))
@@ -1,16 +1,28 @@
1
- Metadata-Version: 2.1
1
+ Metadata-Version: 2.3
2
2
  Name: mlgear
3
- Version: 0.4
3
+ Version: 0.5
4
4
  Summary: Utility scripts for machine learning
5
- Home-page: https://github.com/peterhurford/mlgear
5
+ License: MIT
6
6
  Author: Peter Hurford
7
7
  Author-email: peter@peterhurford.com
8
- License: UNKNOWN
9
- Platform: UNKNOWN
8
+ Requires-Python: >=3.7,<4.0
10
9
  Classifier: Development Status :: 3 - Alpha
11
- Classifier: Programming Language :: Python :: 3
12
10
  Classifier: License :: OSI Approved :: MIT License
13
11
  Classifier: Operating System :: OS Independent
12
+ Classifier: Programming Language :: Python :: 3
13
+ Classifier: Programming Language :: Python :: 3.7
14
+ Classifier: Programming Language :: Python :: 3.8
15
+ Classifier: Programming Language :: Python :: 3.9
16
+ Classifier: Programming Language :: Python :: 3.10
17
+ Classifier: Programming Language :: Python :: 3.11
18
+ Classifier: Programming Language :: Python :: 3.12
19
+ Classifier: Programming Language :: Python :: 3.13
20
+ Requires-Dist: keras
21
+ Requires-Dist: lightgbm
22
+ Requires-Dist: numpy
23
+ Requires-Dist: pandas
24
+ Requires-Dist: scikit-learn
25
+ Project-URL: Repository, https://github.com/peterhurford/mlgear
14
26
  Description-Content-Type: text/markdown
15
27
 
16
28
  ## MLGear
@@ -45,6 +57,23 @@ results = run_cv_model(train, test, target, runLGB, lgb_params, rmse)
45
57
 
46
58
  ### Installation
47
59
 
48
- `pip3 install mlgear`
60
+ ```
61
+ pip install mlgear
62
+ ```
63
+
64
+ For development:
49
65
 
66
+ ```
67
+ # Install poetry if you don't have it
68
+ pip install poetry
69
+
70
+ # Install dependencies
71
+ poetry install
72
+
73
+ # Build the package
74
+ poetry build
75
+
76
+ # Publish to PyPI
77
+ poetry publish
78
+ ```
50
79
 
@@ -0,0 +1,13 @@
1
+ mlgear/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
+ mlgear/aggregators.py,sha256=k_GGL8zuBqpBKPYR_v14SqPcJyAUalHcQN2o7gjApiw,373
3
+ mlgear/cv.py,sha256=Dk1ajLFITWgu5nQP-R1rpT-KQ9oYaQRhgSYqQi0IwX0,4009
4
+ mlgear/encoders.py,sha256=_NRqTNSdCNhT04Odxa9xRQq7nrr9bTFRwskARx1rgnU,5044
5
+ mlgear/lr_scheduler.py,sha256=S7DlTAWTzAUAQbmzm-yWIWI5r49Htz1jRBQ98IQHdgg,5272
6
+ mlgear/metrics.py,sha256=_zQwjz4X3-vUQEqu2yIfq2w2XnuH8YUSD_M_u6szToo,1188
7
+ mlgear/models.py,sha256=RtvmsjMFoecdce_ikj3fa9cukGdvQEj3Y72hz5Qw-fY,6249
8
+ mlgear/tracker.py,sha256=U2OXm9tjAWSc5B5_-oTvj_YAJdpkU4nsmPE7tH8BSN4,447
9
+ mlgear/utils.py,sha256=I72-qBgiisV1hcoUT5almb8GXwfmhTQgwvP6gl8kJEY,1096
10
+ mlgear-0.5.dist-info/LICENSE.txt,sha256=qkKmWAzXQC3lYVyoucB3x4iW2xnGEmaORCB4ADTAik4,1081
11
+ mlgear-0.5.dist-info/METADATA,sha256=sRhbP8yNadrRqDX3mY1wOKpjbWD2DPnLbwmibEFYvYc,1994
12
+ mlgear-0.5.dist-info/WHEEL,sha256=fGIA9gx4Qxk2KDKeNJCbOEwSrmLtjWCwzBz351GyrPQ,88
13
+ mlgear-0.5.dist-info/RECORD,,
@@ -1,5 +1,4 @@
1
1
  Wheel-Version: 1.0
2
- Generator: bdist_wheel (0.35.1)
2
+ Generator: poetry-core 2.1.2
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
-
@@ -1,14 +0,0 @@
1
- mlgear/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
- mlgear/aggregators.py,sha256=k_GGL8zuBqpBKPYR_v14SqPcJyAUalHcQN2o7gjApiw,373
3
- mlgear/cv.py,sha256=6gHFOVHcisu6qs0mYUcHKmCvP4ygWBUnLmUOWBtBQ08,3837
4
- mlgear/encoders.py,sha256=_NRqTNSdCNhT04Odxa9xRQq7nrr9bTFRwskARx1rgnU,5044
5
- mlgear/lr_scheduler.py,sha256=S7DlTAWTzAUAQbmzm-yWIWI5r49Htz1jRBQ98IQHdgg,5272
6
- mlgear/metrics.py,sha256=_zQwjz4X3-vUQEqu2yIfq2w2XnuH8YUSD_M_u6szToo,1188
7
- mlgear/models.py,sha256=6X2VA7On6ioPFHDbB9YTbO3pXlI71GGJb-PhVK8nddY,5740
8
- mlgear/tracker.py,sha256=U2OXm9tjAWSc5B5_-oTvj_YAJdpkU4nsmPE7tH8BSN4,447
9
- mlgear/utils.py,sha256=E8lb0gsTf4tun7PHUQ5GFxwwxY3ZRxwIMzNCTZCb1rM,1032
10
- mlgear-0.4.dist-info/LICENSE.txt,sha256=qkKmWAzXQC3lYVyoucB3x4iW2xnGEmaORCB4ADTAik4,1081
11
- mlgear-0.4.dist-info/METADATA,sha256=mQSNZAZRZBNO1IU7EiifAN0w4u9ECVVkvKgl2TejG8o,1309
12
- mlgear-0.4.dist-info/WHEEL,sha256=EVRjI69F5qVjm_YgqcTXPnTAv3BfSUr0WVAHuSP3Xoo,92
13
- mlgear-0.4.dist-info/top_level.txt,sha256=TM51_lbw1nIKS5TvY-qVQEBGw1tMrgBKGUcB5BISu-Y,7
14
- mlgear-0.4.dist-info/RECORD,,
@@ -1 +0,0 @@
1
- mlgear