icol 0.1.6__py3-none-any.whl → 0.7.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- icol/icol.py +478 -149
- {icol-0.1.6.dist-info → icol-0.7.4.dist-info}/METADATA +2 -2
- icol-0.7.4.dist-info/RECORD +7 -0
- icol-0.1.6.dist-info/RECORD +0 -7
- {icol-0.1.6.dist-info → icol-0.7.4.dist-info}/LICENSE +0 -0
- {icol-0.1.6.dist-info → icol-0.7.4.dist-info}/WHEEL +0 -0
- {icol-0.1.6.dist-info → icol-0.7.4.dist-info}/top_level.txt +0 -0
icol/icol.py
CHANGED
|
@@ -8,9 +8,10 @@ from itertools import combinations, permutations
|
|
|
8
8
|
import numpy as np
|
|
9
9
|
import sympy as sp
|
|
10
10
|
|
|
11
|
-
from sklearn.linear_model import lars_path
|
|
11
|
+
from sklearn.linear_model import lars_path, Ridge, Lars
|
|
12
12
|
from sklearn.preprocessing import PolynomialFeatures
|
|
13
13
|
from sklearn.base import clone
|
|
14
|
+
from sklearn.model_selection import train_test_split
|
|
14
15
|
|
|
15
16
|
from sklearn.metrics import mean_squared_error
|
|
16
17
|
|
|
@@ -21,10 +22,135 @@ def LL(res):
|
|
|
21
22
|
n = len(res)
|
|
22
23
|
return n*np.log(np.sum(res**2)/n)
|
|
23
24
|
|
|
25
|
+
def initialize_ols(D, y, init_idx):
|
|
26
|
+
"""
|
|
27
|
+
Fit initial OLS solution on selected columns of D.
|
|
28
|
+
|
|
29
|
+
Parameters
|
|
30
|
+
----------
|
|
31
|
+
D : (n, d) ndarray
|
|
32
|
+
Full dictionary matrix.
|
|
33
|
+
y : (n,) ndarray
|
|
34
|
+
Response vector.
|
|
35
|
+
init_idx : list[int]
|
|
36
|
+
Indices of columns from D to use initially.
|
|
37
|
+
|
|
38
|
+
Returns
|
|
39
|
+
-------
|
|
40
|
+
beta : (p,) ndarray
|
|
41
|
+
OLS coefficients for selected columns.
|
|
42
|
+
A_inv : (p, p) ndarray
|
|
43
|
+
Inverse Gram matrix for selected columns.
|
|
44
|
+
XT : (p, n) ndarray
|
|
45
|
+
Transposed design matrix of selected columns.
|
|
46
|
+
active_idx : list[int]
|
|
47
|
+
Current indices of D included in the model.
|
|
48
|
+
"""
|
|
49
|
+
X = D[:, init_idx]
|
|
50
|
+
A = X.T @ X
|
|
51
|
+
try:
|
|
52
|
+
A_inv = np.linalg.inv(A)
|
|
53
|
+
except np.linalg.LinAlgError:
|
|
54
|
+
A_inv = np.linalg.pinv(A)
|
|
55
|
+
beta = A_inv @ (X.T @ y)
|
|
56
|
+
XT = X.T
|
|
57
|
+
return beta, A_inv, XT, list(init_idx)
|
|
58
|
+
|
|
59
|
+
def sweep_update_from_D(beta, A_inv, XT, active_idx, D, y, new_idx):
|
|
60
|
+
# Generated with ChatGPT using the commands;
|
|
61
|
+
# 1. write me a function which takes in an n by p dimension matrix X, for which we already have an OLS solution, beta.
|
|
62
|
+
# Additionally, a second input is a data matrix Z with n rows and q columns.
|
|
63
|
+
# Add the Z matrix of columns to the OLS solution using SWEEP
|
|
64
|
+
# 2. Are we also able to efficiently update the gram and its inverse with this procedure for X augmented with Z
|
|
65
|
+
# 3. Ok, imagine that I need to update my SWEEP solution multiple times.
|
|
66
|
+
# Adjust the inputs and return values so that everything can be used again in the next SWEEP update.
|
|
67
|
+
# Then update the function to make use of these previous computations
|
|
68
|
+
# 4. Lets make some changes for the sake of indexing. Imagine that we have a large matrix D, with d columns.
|
|
69
|
+
# Through some selection procedure we select p of those columns to form an initial OLS solution.
|
|
70
|
+
# We then iteratively select p new columns and incorporate those into the ols solution using sweep.
|
|
71
|
+
# Update the code to reflect this change while also tracking the indices of columns in the original D matrix
|
|
72
|
+
# and their mapping to the respective betas.
|
|
73
|
+
|
|
74
|
+
"""
|
|
75
|
+
Update OLS solution by adding new columns from D.
|
|
76
|
+
|
|
77
|
+
Parameters
|
|
78
|
+
----------
|
|
79
|
+
beta : (p,) ndarray
|
|
80
|
+
Current OLS coefficients.
|
|
81
|
+
A_inv : (p, p) ndarray
|
|
82
|
+
Inverse Gram matrix for current features.
|
|
83
|
+
XT : (p, n) ndarray
|
|
84
|
+
Transposed design matrix for current features.
|
|
85
|
+
active_idx : list[int]
|
|
86
|
+
Current indices of columns in D that are in the model.
|
|
87
|
+
D : (n, d) ndarray
|
|
88
|
+
Full dictionary matrix.
|
|
89
|
+
y : (n,) ndarray
|
|
90
|
+
Response vector.
|
|
91
|
+
new_idx : list[int]
|
|
92
|
+
Indices of new columns in D to add.
|
|
93
|
+
|
|
94
|
+
Returns
|
|
95
|
+
-------
|
|
96
|
+
beta_new : (p+q,) ndarray
|
|
97
|
+
Updated OLS coefficients.
|
|
98
|
+
A_tilde_inv : (p+q, p+q) ndarray
|
|
99
|
+
Updated inverse Gram matrix.
|
|
100
|
+
XT_new : (p+q, n) ndarray
|
|
101
|
+
Updated design matrix transpose.
|
|
102
|
+
active_idx_new : list[int]
|
|
103
|
+
Updated indices of active columns in D.
|
|
104
|
+
"""
|
|
105
|
+
p = beta.shape[0]
|
|
106
|
+
Z = D[:, new_idx] # n x q
|
|
107
|
+
q = Z.shape[1]
|
|
108
|
+
|
|
109
|
+
# Cross products
|
|
110
|
+
B = XT @ Z # p x q
|
|
111
|
+
C = Z.T @ Z # q x q
|
|
112
|
+
yZ = Z.T @ y # q x 1
|
|
113
|
+
|
|
114
|
+
# Schur complement
|
|
115
|
+
S = C - B.T @ (A_inv @ B)
|
|
116
|
+
|
|
117
|
+
# Solve for new coefficients (numerically stable)
|
|
118
|
+
rhs = yZ - B.T @ beta
|
|
119
|
+
try:
|
|
120
|
+
beta_Z = np.linalg.solve(S, rhs)
|
|
121
|
+
except np.linalg.LinAlgError:
|
|
122
|
+
beta_Z = np.linalg.pinv(S) @ rhs
|
|
123
|
+
|
|
124
|
+
# Update old coefficients
|
|
125
|
+
beta_X_new = beta - A_inv @ (B @ beta_Z)
|
|
126
|
+
beta_new = np.concatenate([beta_X_new, beta_Z])
|
|
127
|
+
|
|
128
|
+
# Update Gram inverse
|
|
129
|
+
try:
|
|
130
|
+
S_inv = np.linalg.inv(S) # small q x q
|
|
131
|
+
except np.linalg.LinAlgError:
|
|
132
|
+
S_inv = np.linalg.pinv(S)
|
|
133
|
+
|
|
134
|
+
top_left = A_inv + A_inv @ B @ S_inv @ B.T @ A_inv
|
|
135
|
+
top_right = -A_inv @ B @ S_inv
|
|
136
|
+
bottom_left = -S_inv @ B.T @ A_inv
|
|
137
|
+
bottom_right = S_inv
|
|
138
|
+
|
|
139
|
+
A_tilde_inv = np.block([
|
|
140
|
+
[top_left, top_right],
|
|
141
|
+
[bottom_left, bottom_right]
|
|
142
|
+
])
|
|
143
|
+
|
|
144
|
+
# Update XT and active indices
|
|
145
|
+
XT_new = np.vstack([XT, Z.T])
|
|
146
|
+
active_idx_new = active_idx + list(new_idx)
|
|
147
|
+
|
|
148
|
+
return beta_new, A_tilde_inv, XT_new, active_idx_new
|
|
149
|
+
|
|
24
150
|
IC_DICT = {
|
|
25
151
|
'AIC': lambda res, k: LL(res) + 2*k,
|
|
26
152
|
'HQIC': lambda res, k: LL(res) + np.log(np.log(len(res)))*k,
|
|
27
|
-
'
|
|
153
|
+
'BIC': lambda res, k, n: LL(res) + 2*k*np.log(n),
|
|
28
154
|
'CAIC': lambda res, k: LL(res) + (np.log(len(res))+1)*k,
|
|
29
155
|
'AICc': lambda res, k: LL(res) + 2*k + 2*k*(k+1)/(len(res)-k-1)
|
|
30
156
|
}
|
|
@@ -168,7 +294,7 @@ class PolynomialFeaturesICL:
|
|
|
168
294
|
|
|
169
295
|
def get_feature_names_out(self):
|
|
170
296
|
return self.PolynomialFeatures.get_feature_names_out()
|
|
171
|
-
|
|
297
|
+
|
|
172
298
|
class BSS:
|
|
173
299
|
def __init__(self):
|
|
174
300
|
pass
|
|
@@ -232,13 +358,97 @@ class BSS:
|
|
|
232
358
|
beta_ret = np.zeros(p)
|
|
233
359
|
beta_ret[list(best_comb)] = beta.reshape(1, -1)
|
|
234
360
|
return beta_ret
|
|
235
|
-
|
|
361
|
+
|
|
362
|
+
class EfficientAdaptiveLASSO:
|
|
363
|
+
def __init__(self, gamma=1, fit_intercept=False, default_d=5, rcond=-1, alpha=0):
|
|
364
|
+
self.gamma = gamma
|
|
365
|
+
self.fit_intercept = fit_intercept
|
|
366
|
+
self.default_d = default_d
|
|
367
|
+
self.rcond=rcond
|
|
368
|
+
self.alpha=alpha
|
|
369
|
+
self.A_inv = None
|
|
370
|
+
self.XT = None
|
|
371
|
+
self.beta_ols = None
|
|
372
|
+
self.active_idx = None
|
|
373
|
+
|
|
374
|
+
def __str__(self):
|
|
375
|
+
return ('EffAda' if self.gamma != 0 else '') + ('LASSO') + ('(gamma={0})'.format(self.gamma) if self.gamma != 0 else '')
|
|
376
|
+
|
|
377
|
+
def __repr__(self):
|
|
378
|
+
return self.__str__()
|
|
379
|
+
|
|
380
|
+
def get_params(self, deep=False):
|
|
381
|
+
return {'gamma': self.gamma,
|
|
382
|
+
'fit_intercept': self.fit_intercept,
|
|
383
|
+
'default_d': self.default_d,
|
|
384
|
+
'rcond': self.rcond}
|
|
385
|
+
|
|
386
|
+
def set_default_d(self, d):
|
|
387
|
+
self.default_d = d
|
|
388
|
+
|
|
389
|
+
def __call__(self, X, y, d, idx_old = None, idx_new=None, verbose=False):
|
|
390
|
+
|
|
391
|
+
self.set_default_d(d)
|
|
392
|
+
nonancols = np.isnan(X).sum(axis=0)==0
|
|
393
|
+
noinfcols = np.isinf(X).sum(axis=0)==0
|
|
394
|
+
valcols = np.logical_and(nonancols, noinfcols)
|
|
395
|
+
idx_ala = list(idx_new) + list(idx_old)
|
|
396
|
+
|
|
397
|
+
if np.abs(self.gamma)<1e-10:
|
|
398
|
+
beta_ols = np.ones(X.shape[1])
|
|
399
|
+
w_hat = np.ones(X.shape[1])
|
|
400
|
+
X_star_star = X.copy()
|
|
401
|
+
else:
|
|
402
|
+
X_valcols = X[:, valcols]
|
|
403
|
+
if not idx_old:
|
|
404
|
+
self.beta_ols, self.A_inv, self.XT, self.active_idx = initialize_ols(X_valcols, y, init_idx=idx_new)
|
|
405
|
+
else:
|
|
406
|
+
self.beta_ols, self.A_inv, self.XT, self.active_idx = sweep_update_from_D(beta = self.beta_ols, A_inv=self.A_inv,
|
|
407
|
+
XT=self.XT, active_idx=self.active_idx, D=X, y=y,
|
|
408
|
+
new_idx=idx_new)
|
|
409
|
+
|
|
410
|
+
w_hat = 1/np.power(np.abs(self.beta_ols), self.gamma)
|
|
411
|
+
X_star_star = np.zeros_like(X_valcols[:, idx_ala])
|
|
412
|
+
for j in range(X_star_star.shape[1]): # vectorise
|
|
413
|
+
X_j = X_valcols[:, j]/w_hat[j]
|
|
414
|
+
X_star_star[:, j] = X_j
|
|
415
|
+
|
|
416
|
+
_, _, coefs, _ = lars_path(X_star_star, y.ravel(), return_n_iter=True, max_iter=d, method='lasso')
|
|
417
|
+
# alphas, active, coefs = lars_path(X_star_star, y.ravel(), method='lasso')
|
|
418
|
+
try:
|
|
419
|
+
beta_hat_star_star = coefs[:, d]
|
|
420
|
+
except IndexError: # in the event that a solution with d components cant be found, use the next largest.
|
|
421
|
+
beta_hat_star_star = coefs[:, -1]
|
|
422
|
+
|
|
423
|
+
beta_hat_star_n_old_new = np.array([beta_hat_star_star[j]/w_hat[j] for j in range(len(beta_hat_star_star))])
|
|
424
|
+
# beta_hat_star_n = np.zeros(X.shape[1])
|
|
425
|
+
# beta_hat_star_n[idx_ala] = beta_hat_star_n_old_new
|
|
426
|
+
|
|
427
|
+
# beta_hat_star_n[valcols] = beta_hat_star_n_valcol
|
|
428
|
+
# ret = beta_hat_star_n.reshape(1, -1).squeeze()
|
|
429
|
+
return beta_hat_star_n_old_new.squeeze()
|
|
430
|
+
|
|
431
|
+
def fit(self, X, y, verbose=False):
|
|
432
|
+
self.mu = y.mean() if self.fit_intercept else 0
|
|
433
|
+
beta = self.__call__(X=X, y=y-self.mu, d=self.default_d, verbose=verbose)
|
|
434
|
+
self.beta = beta.reshape(-1, 1)
|
|
435
|
+
|
|
436
|
+
def predict(self, X):
|
|
437
|
+
return np.dot(X, self.beta) + self.mu
|
|
438
|
+
|
|
439
|
+
def s_max(self, k, n, p, c1=1, c0=0):
|
|
440
|
+
if self.gamma==0:
|
|
441
|
+
return c1*(p/(k**2)) + c0
|
|
442
|
+
else:
|
|
443
|
+
return c1*min(np.power(p, 1/2)/k, np.power(p*n, 1/3)/k) + c0
|
|
444
|
+
|
|
236
445
|
class AdaptiveLASSO:
|
|
237
|
-
def __init__(self, gamma=1, fit_intercept=False, default_d=5, rcond=-1):
|
|
446
|
+
def __init__(self, gamma=1, fit_intercept=False, default_d=5, rcond=-1, alpha=0):
|
|
238
447
|
self.gamma = gamma
|
|
239
448
|
self.fit_intercept = fit_intercept
|
|
240
449
|
self.default_d = default_d
|
|
241
450
|
self.rcond=rcond
|
|
451
|
+
self.alpha=0
|
|
242
452
|
|
|
243
453
|
def __str__(self):
|
|
244
454
|
return ('Ada' if self.gamma != 0 else '') + ('LASSO') + ('(gamma={0})'.format(self.gamma) if self.gamma != 0 else '')
|
|
@@ -255,21 +465,26 @@ class AdaptiveLASSO:
|
|
|
255
465
|
def set_default_d(self, d):
|
|
256
466
|
self.default_d = d
|
|
257
467
|
|
|
258
|
-
def __call__(self, X, y, d,
|
|
468
|
+
def __call__(self, X, y, d, verbose=False):
|
|
259
469
|
|
|
260
470
|
self.set_default_d(d)
|
|
261
471
|
|
|
472
|
+
nonancols = np.isnan(X).sum(axis=0)==0
|
|
473
|
+
noinfcols = np.isinf(X).sum(axis=0)==0
|
|
474
|
+
valcols = np.logical_and(nonancols, noinfcols)
|
|
262
475
|
if np.abs(self.gamma)<1e-10:
|
|
263
476
|
beta_hat = np.ones(X.shape[1])
|
|
264
477
|
w_hat = np.ones(X.shape[1])
|
|
265
478
|
X_star_star = X.copy()
|
|
266
479
|
else:
|
|
267
|
-
|
|
480
|
+
|
|
481
|
+
X_valcols = X[:, valcols]
|
|
482
|
+
beta_hat, _, _, _ = np.linalg.lstsq(X_valcols, y, rcond=self.rcond)
|
|
268
483
|
|
|
269
484
|
w_hat = 1/np.power(np.abs(beta_hat), self.gamma)
|
|
270
|
-
X_star_star = np.zeros_like(
|
|
485
|
+
X_star_star = np.zeros_like(X_valcols)
|
|
271
486
|
for j in range(X_star_star.shape[1]): # vectorise
|
|
272
|
-
X_j =
|
|
487
|
+
X_j = X_valcols[:, j]/w_hat[j]
|
|
273
488
|
X_star_star[:, j] = X_j
|
|
274
489
|
|
|
275
490
|
_, _, coefs, _ = lars_path(X_star_star, y.ravel(), return_n_iter=True, max_iter=d, method='lasso')
|
|
@@ -278,7 +493,10 @@ class AdaptiveLASSO:
|
|
|
278
493
|
beta_hat_star_star = coefs[:, d]
|
|
279
494
|
except IndexError:
|
|
280
495
|
beta_hat_star_star = coefs[:, -1]
|
|
281
|
-
|
|
496
|
+
|
|
497
|
+
beta_hat_star_n_valcol = np.array([beta_hat_star_star[j]/w_hat[j] for j in range(len(beta_hat_star_star))])
|
|
498
|
+
beta_hat_star_n = np.zeros(X.shape[1])
|
|
499
|
+
beta_hat_star_n[valcols] = beta_hat_star_n_valcol
|
|
282
500
|
return beta_hat_star_n.reshape(1, -1).squeeze()
|
|
283
501
|
|
|
284
502
|
def fit(self, X, y, verbose=False):
|
|
@@ -295,6 +513,27 @@ class AdaptiveLASSO:
|
|
|
295
513
|
else:
|
|
296
514
|
return c1*min(np.power(p, 1/2)/k, np.power(p*n, 1/3)/k) + c0
|
|
297
515
|
|
|
516
|
+
class LARS:
|
|
517
|
+
def __init__(self, default_d=None):
|
|
518
|
+
self.default_d=default_d
|
|
519
|
+
|
|
520
|
+
def __repr__(self):
|
|
521
|
+
return 'Lars'
|
|
522
|
+
|
|
523
|
+
def __str__(self):
|
|
524
|
+
return 'Lars'
|
|
525
|
+
|
|
526
|
+
def set_default_d(self, default_d):
|
|
527
|
+
self.default_d = default_d
|
|
528
|
+
|
|
529
|
+
def get_params(self, deep=False):
|
|
530
|
+
return {'default_d': self.default_d}
|
|
531
|
+
|
|
532
|
+
def __call__(self, X, y, d, verbose=False):
|
|
533
|
+
self.lars = Lars(fit_intercept=False, fit_path=False, verbose=verbose, n_nonzero_coefs=d, copy_X=True)
|
|
534
|
+
self.lars.fit(X, y)
|
|
535
|
+
return self.lars.coef_
|
|
536
|
+
|
|
298
537
|
class ThresholdedLeastSquares:
|
|
299
538
|
def __init__(self, default_d=None):
|
|
300
539
|
self.default_d=default_d
|
|
@@ -368,38 +607,38 @@ class SIS:
|
|
|
368
607
|
return best_corr, best_idxs
|
|
369
608
|
|
|
370
609
|
class ICL:
|
|
371
|
-
def __init__(self, s, so,
|
|
610
|
+
def __init__(self, s, so, k, fit_intercept=True, normalize=True, pool_reset=False, optimize_k=False, track_intermediates=False):
|
|
372
611
|
self.s = s
|
|
373
612
|
self.sis = SIS(n_sis=s)
|
|
374
613
|
self.so = so
|
|
375
|
-
self.
|
|
614
|
+
self.k = k
|
|
376
615
|
self.fit_intercept = fit_intercept
|
|
377
616
|
self.normalize=normalize
|
|
378
617
|
self.pool_reset = pool_reset
|
|
379
|
-
self.
|
|
380
|
-
|
|
381
|
-
|
|
618
|
+
self.optimize_k = optimize_k
|
|
619
|
+
self.track_intermediates = track_intermediates
|
|
620
|
+
|
|
382
621
|
def get_params(self, deep=False):
|
|
383
622
|
return {'s': self.s,
|
|
384
623
|
'so': self.so,
|
|
385
|
-
'
|
|
624
|
+
'k': self.k,
|
|
386
625
|
'fit_intercept': self.fit_intercept,
|
|
387
626
|
'normalize': self.normalize,
|
|
388
627
|
'pool_reset': self.pool_reset,
|
|
389
|
-
'
|
|
628
|
+
'self.optimize_k': self.optimize_k
|
|
390
629
|
}
|
|
391
630
|
|
|
392
631
|
def __str__(self):
|
|
393
|
-
return '
|
|
632
|
+
return 'ICL(n_sis={0}, SO={1}, k={2})'.format(self.s, str(self.so), self.k)
|
|
394
633
|
|
|
395
634
|
def __repr__(self, prec=3):
|
|
396
635
|
ret = []
|
|
397
636
|
for i, name in enumerate(self.feature_names_sparse_):
|
|
398
|
-
ret += [('+' if self.coef_[0, i] > 0 else '') +
|
|
399
|
-
|
|
637
|
+
ret += [('+' if self.coef_[0, i] > 0 else '') +
|
|
638
|
+
str(np.format_float_scientific(self.coef_[0, i], precision=prec, unique=False))
|
|
639
|
+
+ ' (' + str(name) + ')' + '\n']
|
|
640
|
+
ret += [('+' if self.intercept_>0 else '') + str(float(np.round(self.intercept_, prec)))]
|
|
400
641
|
return ''.join(ret)
|
|
401
|
-
|
|
402
|
-
# return '+'.join(['{0}({1})'.format(str(np.round(b, 3)), self.feature_names_sparse_[i]) for i, b in enumerate(self.coef_) if np.abs(b) > 0]+[str(self.intercept_)])
|
|
403
642
|
|
|
404
643
|
def solve_norm_coef(self, X, y):
|
|
405
644
|
n, p = X.shape
|
|
@@ -440,32 +679,37 @@ class ICL:
|
|
|
440
679
|
|
|
441
680
|
return bad_cols
|
|
442
681
|
|
|
443
|
-
def fitting(self, X, y, feature_names=None, verbose=False, track_pool=False,
|
|
682
|
+
def fitting(self, X, y, feature_names=None, verbose=False, track_pool=False, opt_k = None):
|
|
444
683
|
self.feature_names_ = feature_names
|
|
445
684
|
n,p = X.shape
|
|
685
|
+
stopping = self.k if opt_k is None else opt_k
|
|
686
|
+
if verbose: print('Stopping after {0} iterations'.format(stopping))
|
|
446
687
|
|
|
447
688
|
pool_ = set()
|
|
448
689
|
if track_pool: self.pool = []
|
|
449
|
-
if track_intermediates: self.intermediates = np.empty(shape=(self.
|
|
690
|
+
if self.optimize_k or self.track_intermediates: self.intermediates = np.empty(shape=(self.k, 5), dtype=object)
|
|
691
|
+
|
|
450
692
|
res = y
|
|
451
693
|
i = 0
|
|
452
694
|
IC = np.infty
|
|
453
|
-
|
|
454
|
-
while i < self.d and cont:
|
|
695
|
+
while i < stopping:
|
|
455
696
|
self.intercept_ = np.mean(res).squeeze()
|
|
456
697
|
if verbose: print('.', end='')
|
|
457
698
|
|
|
458
699
|
p, sis_i = self.sis(X=X, res=res, pool=list(pool_), verbose=verbose)
|
|
700
|
+
pool_old = deepcopy(pool_)
|
|
459
701
|
pool_.update(sis_i)
|
|
460
702
|
pool_lst = list(pool_)
|
|
461
|
-
|
|
462
703
|
if track_pool: self.pool = pool_lst
|
|
463
|
-
|
|
704
|
+
if str(self.so) == 'EffAdaLASSO(gamma=1)':
|
|
705
|
+
beta_i = self.so(X=X, y=y, d=i+1, idx_old = list(pool_old), idx_new=sis_i, verbose=verbose)
|
|
706
|
+
else:
|
|
707
|
+
beta_i = self.so(X=X[:, pool_lst], y=y, d=i+1, verbose=verbose)
|
|
464
708
|
|
|
465
709
|
beta = np.zeros(shape=(X.shape[1]))
|
|
466
710
|
beta[pool_lst] = beta_i
|
|
467
711
|
|
|
468
|
-
if track_intermediates:
|
|
712
|
+
if self.optimize_k or self.track_intermediates:
|
|
469
713
|
idx = np.nonzero(beta)[0]
|
|
470
714
|
if self.normalize:
|
|
471
715
|
coef = (beta[idx].reshape(1, -1)*self.b_y/self.b_x[idx].reshape(1, -1))
|
|
@@ -474,7 +718,7 @@ class ICL:
|
|
|
474
718
|
coef = beta[idx]
|
|
475
719
|
intercept_ = self.intercept_
|
|
476
720
|
coef = coef[0]
|
|
477
|
-
expr = ''.join([('+' if float(c) >= 0 else '') + str(np.round(float(c), 3)) + self.feature_names_[idx][q] for q, c in enumerate(coef)])
|
|
721
|
+
expr = ''.join([('+' if float(c) >= 0 else '') + str(np.round(float(c), 3)) + str(self.feature_names_[idx][q]) for q, c in enumerate(coef)])
|
|
478
722
|
if verbose: print('Model after {0} iterations: {1}'.format(i, expr))
|
|
479
723
|
|
|
480
724
|
self.intermediates[i, 0] = deepcopy(idx)
|
|
@@ -491,14 +735,9 @@ class ICL:
|
|
|
491
735
|
pool_ = set(pool_lst)
|
|
492
736
|
|
|
493
737
|
res = (y.reshape(1, -1) - (np.dot(X, beta).reshape(1, -1)+self.intercept_) ).T
|
|
494
|
-
if not(self.information_criteria is None):
|
|
495
|
-
IC_old = IC
|
|
496
|
-
IC = IC_DICT[self.information_criteria](res=res, k=i+1)
|
|
497
|
-
if verbose: print('{0}={1}'.format(self.information_criteria, IC))
|
|
498
|
-
cont = IC < IC_old
|
|
499
738
|
|
|
500
739
|
i += 1
|
|
501
|
-
if track_intermediates: self.intermediates = self.intermediates[:, :i]
|
|
740
|
+
if self.optimize_k or self.track_intermediates: self.intermediates = self.intermediates[:, :i]
|
|
502
741
|
|
|
503
742
|
if verbose: print()
|
|
504
743
|
|
|
@@ -511,7 +750,7 @@ class ICL:
|
|
|
511
750
|
|
|
512
751
|
return self
|
|
513
752
|
|
|
514
|
-
def fit(self, X, y, feature_names=None, timer=False, verbose=False, track_pool=False,
|
|
753
|
+
def fit(self, X, y, val_size=0.1, feature_names=None, timer=False, verbose=False, track_pool=False, random_state=None):
|
|
515
754
|
if verbose: print('removing invalid features')
|
|
516
755
|
self.bad_col = self.filter_invalid_cols(X)
|
|
517
756
|
X_ = np.delete(X, self.bad_col, axis=1)
|
|
@@ -522,9 +761,27 @@ class ICL:
|
|
|
522
761
|
self.solve_norm_coef(X_, y)
|
|
523
762
|
X_, y_ = self.normalize_Xy(X_, y)
|
|
524
763
|
|
|
525
|
-
if verbose: print('Fitting
|
|
764
|
+
if verbose: print('Fitting ICL model')
|
|
526
765
|
if timer: start=time()
|
|
527
|
-
self.
|
|
766
|
+
if self.optimize_k == False:
|
|
767
|
+
self.fitting(X=X_, y=y_, feature_names=feature_names_, verbose=verbose, track_pool = track_pool)
|
|
768
|
+
else:
|
|
769
|
+
if verbose: print('Finding optimal model size')
|
|
770
|
+
X_train, X_val, y_train, y_val = train_test_split(X_, y_, test_size=val_size, random_state=random_state)
|
|
771
|
+
self.fitting(X=X_train, y=y_train, feature_names=feature_names_, verbose=verbose, track_pool = track_pool)
|
|
772
|
+
best_k, best_e2 = 0, np.infty
|
|
773
|
+
for k in range(self.k):
|
|
774
|
+
idx = self.intermediates[k, 0]
|
|
775
|
+
coef = self.intermediates[k, 1]
|
|
776
|
+
inter = self.intermediates[k, 2]
|
|
777
|
+
X_pred = np.delete(X_val, self.bad_col, axis=1)
|
|
778
|
+
y_hat = (np.dot(X_pred[:, idx], coef.squeeze()) + inter).reshape(-1, 1)
|
|
779
|
+
e2_val = rmse(y_hat, y_val)
|
|
780
|
+
if e2_val < best_e2:
|
|
781
|
+
best_k, best_e2 = k+1, e2_val
|
|
782
|
+
if verbose: print('refitting with k={0}'.format(best_k))
|
|
783
|
+
self.fitting(X=X_, y=y_, feature_names=feature_names_, verbose=verbose, track_pool = track_pool, opt_k = best_k)
|
|
784
|
+
|
|
528
785
|
if timer: self.fit_time=time()-start
|
|
529
786
|
if timer and verbose: print(self.fit_time)
|
|
530
787
|
|
|
@@ -544,9 +801,40 @@ class ICL:
|
|
|
544
801
|
X_ = np.delete(X, self.bad_col, axis=1)
|
|
545
802
|
return (np.dot(X_[:, self.beta_idx_], self.coef_.squeeze()) + self.intercept_).reshape(-1, 1)
|
|
546
803
|
|
|
804
|
+
def predict_ensemble(self, X):
|
|
805
|
+
y_hat = np.zeros(shape=(X.shape[0], self.k))
|
|
806
|
+
for k in range(self.k):
|
|
807
|
+
idx = self.intermediates[k, 0]
|
|
808
|
+
coef = self.intermediates[k, 1]
|
|
809
|
+
inter = self.intermediates[k, 2]
|
|
810
|
+
X_pred = np.delete(X, self.bad_col, axis=1)
|
|
811
|
+
y_hat[:, k]=(np.dot(X_pred[:, idx], coef) + inter).reshape(-1, 1).squeeze()
|
|
812
|
+
return y_hat
|
|
813
|
+
|
|
814
|
+
def repr_ensemble(self, prec=3):
|
|
815
|
+
ret = []
|
|
816
|
+
for k in range(self.k):
|
|
817
|
+
idx = self.intermediates[k, 0]
|
|
818
|
+
coef = self.intermediates[k, 1]
|
|
819
|
+
inter = self.intermediates[k, 2]
|
|
820
|
+
feat = self.intermediates[k, 3]
|
|
821
|
+
model_k = []
|
|
822
|
+
for i, name in enumerate(feat):
|
|
823
|
+
model_k += [('+' if coef[i] > 0 else '') +
|
|
824
|
+
str(np.format_float_scientific(coef[i], precision=prec, unique=False))
|
|
825
|
+
+ ' (' + str(name) + ')' + '\n']
|
|
826
|
+
model_k += [('+' if inter > 0 else '') + str(float(np.round(inter, prec)))]
|
|
827
|
+
model_k = ''.join(model_k)
|
|
828
|
+
ret += [model_k]
|
|
829
|
+
return ';\n\n'.join(ret)
|
|
830
|
+
|
|
547
831
|
def score(self, X, y, scorer=rmse):
|
|
548
832
|
return scorer(self.predict(X), y)
|
|
549
833
|
|
|
834
|
+
def score_ensemble(self, X, y):
|
|
835
|
+
y_hat_ens = self.predict_ensemble(X)
|
|
836
|
+
return np.mean((y_hat_ens - y.reshape(-1,1))**2, axis=0)
|
|
837
|
+
|
|
550
838
|
class BOOTSTRAP:
|
|
551
839
|
def __init__(self, X, y=None, random_state=None):
|
|
552
840
|
self.X = X
|
|
@@ -630,20 +918,12 @@ class FeatureExpansion:
|
|
|
630
918
|
self.ops = ops
|
|
631
919
|
self.rung = rung
|
|
632
920
|
self.printrate = printrate
|
|
633
|
-
|
|
634
|
-
|
|
635
|
-
|
|
636
|
-
|
|
637
|
-
|
|
638
|
-
|
|
639
|
-
spnames, names, X_ = self.FE_aux(X=X, feature_names=feature_names, rung=self.rung, max_rung=self.rung, prev_start = -1, verbose=verbose)
|
|
640
|
-
if verbose: print('Created {0} features, now removing duplicate features'.format(X_.shape[1]))
|
|
641
|
-
spnames, names, X_ = self.remove_redundant_features(spnames, names, X_)
|
|
642
|
-
if f:
|
|
643
|
-
pass
|
|
644
|
-
return spnames, names, X_
|
|
645
|
-
|
|
646
|
-
def remove_redundant_features(self, spnames, names, X_):
|
|
921
|
+
self.prev_print = 0
|
|
922
|
+
for i, op in enumerate(self.ops):
|
|
923
|
+
if type(op) == str:
|
|
924
|
+
self.ops[i] = (op, range(rung))
|
|
925
|
+
|
|
926
|
+
def remove_redundant_features(self, symbols, names, X):
|
|
647
927
|
sorted_idxs = np.argsort(names)
|
|
648
928
|
for i, idx in enumerate(sorted_idxs):
|
|
649
929
|
if i == 0:
|
|
@@ -651,28 +931,83 @@ class FeatureExpansion:
|
|
|
651
931
|
elif names[idx] != names[sorted_idxs[i-1]]:
|
|
652
932
|
unique += [idx]
|
|
653
933
|
unique_original_order = np.sort(unique)
|
|
654
|
-
|
|
655
|
-
|
|
656
|
-
|
|
657
|
-
|
|
658
|
-
p = X.shape
|
|
659
|
-
|
|
660
|
-
|
|
661
|
-
|
|
662
|
-
|
|
663
|
-
|
|
664
|
-
|
|
665
|
-
|
|
666
|
-
|
|
667
|
-
|
|
668
|
-
|
|
669
|
-
|
|
670
|
-
|
|
671
|
-
|
|
672
|
-
|
|
673
|
-
|
|
674
|
-
|
|
675
|
-
|
|
934
|
+
|
|
935
|
+
return symbols[unique_original_order], names[unique_original_order], X[:, unique_original_order]
|
|
936
|
+
|
|
937
|
+
def expand(self, X, names=None, verbose=False, f=None, check_pos=False):
|
|
938
|
+
n, p = X.shape
|
|
939
|
+
if (names is None) or (len(names) != p):
|
|
940
|
+
names = ['x_{0}'.format(i) for i in range(X.shape[1])]
|
|
941
|
+
|
|
942
|
+
if check_pos == False:
|
|
943
|
+
symbols = sp.symbols(' '.join(name.replace(' ', '.') for name in names))
|
|
944
|
+
else:
|
|
945
|
+
symbols = []
|
|
946
|
+
for i, name in enumerate(names):
|
|
947
|
+
name = name.replace(' ', '.')
|
|
948
|
+
if np.all(X[:, i] > 0):
|
|
949
|
+
sym = sp.symbols(name, real=True, positive=True)
|
|
950
|
+
else:
|
|
951
|
+
sym = sp.symbols(name, real=True)
|
|
952
|
+
symbols.append(sym)
|
|
953
|
+
|
|
954
|
+
symbols = np.array(symbols)
|
|
955
|
+
names = np.array(names)
|
|
956
|
+
|
|
957
|
+
if verbose: print('Estimating the creation of around {0} features'.format(self.estimate_workload(p=p, max_rung=self.rung, verbose=verbose>2)))
|
|
958
|
+
|
|
959
|
+
names, symbols, X = self.expand_aux(X=X, names=names, symbols=symbols, crung=0, prev_p=0, verbose=verbose)
|
|
960
|
+
if not(f is None):
|
|
961
|
+
import pandas as pd
|
|
962
|
+
df = pd.DataFrame(data=X, columns=names)
|
|
963
|
+
df['y'] = y
|
|
964
|
+
df.to_csv(f)
|
|
965
|
+
|
|
966
|
+
return names, symbols, X
|
|
967
|
+
|
|
968
|
+
def estimate_workload(self, p, max_rung,verbose=False):
|
|
969
|
+
p0 = 0
|
|
970
|
+
p1 = p
|
|
971
|
+
for rung in range(max_rung):
|
|
972
|
+
if verbose: print('Applying rung {0} expansion'.format(rung))
|
|
973
|
+
new_u, new_bc, new_bn = 0, 0, 0
|
|
974
|
+
for (op, rung_range) in self.ops:
|
|
975
|
+
if rung in rung_range:
|
|
976
|
+
if verbose: print('Applying {0} to {1} features will result in approximately '.format(op, p1-p0))
|
|
977
|
+
if OP_DICT[op]['inputs'] == 1:
|
|
978
|
+
new_u += p1
|
|
979
|
+
if verbose: print('{0} new features'.format(p1))
|
|
980
|
+
elif OP_DICT[op]['commutative'] == True:
|
|
981
|
+
new_bc += (1/2)*(p1 - p0 + 1)*(p0 + p1 + 2)
|
|
982
|
+
if verbose: print('{0} new features'.format((1/2)*(p1 - p0 + 1)*(p0 + p1 + 2)))
|
|
983
|
+
else:
|
|
984
|
+
new_bn += (p1 - p0 + 1)*(p0 + p1 + 2)
|
|
985
|
+
if verbose: print('{0} new features'.format((p1 - p0 + 1)*(p0 + p1 + 2)))
|
|
986
|
+
p0 = p1
|
|
987
|
+
p1 = p1 + new_u + new_bc + new_bn
|
|
988
|
+
if verbose: print('For a total of {0} features by rung {1}'.format(p1, rung))
|
|
989
|
+
return p1
|
|
990
|
+
|
|
991
|
+
def add_new(self, new_names, new_symbols, new_X, new_name, new_symbol, new_X_i, verbose=False):
|
|
992
|
+
valid = (np.isnan(new_X_i).sum(axis=0) + np.isposinf(new_X_i).sum(axis=0) + np.isneginf(new_X_i).sum(axis=0)) == 0
|
|
993
|
+
if new_names is None:
|
|
994
|
+
new_names = np.array(new_name[valid])
|
|
995
|
+
new_symbols = np.array(new_symbol[valid])
|
|
996
|
+
new_X = np.array(new_X_i[:, valid])
|
|
997
|
+
else:
|
|
998
|
+
new_names = np.concatenate((new_names, new_name[valid]))
|
|
999
|
+
new_symbols = np.concatenate((new_symbols, new_symbol[valid]))
|
|
1000
|
+
new_X = np.hstack([new_X, new_X_i[:, valid]])
|
|
1001
|
+
# if (verbose > 1) and not(new_names is None) and (len(new_names) % self.printrate == 0): print('Created {0} features so far'.format(len(new_names)))
|
|
1002
|
+
if (verbose > 1) and not(new_names is None) and (len(new_names) - self.prev_print >= self.printrate):
|
|
1003
|
+
self.prev_print = len(new_names)
|
|
1004
|
+
elapsed = np.round(time() - self.start_time, 2)
|
|
1005
|
+
print('Created {0} features so far in {1} seconds'.format(len(new_names),elapsed))
|
|
1006
|
+
return new_names, new_symbols, new_X
|
|
1007
|
+
|
|
1008
|
+
def expand_aux(self, X, names, symbols, crung, prev_p, verbose=False):
|
|
1009
|
+
|
|
1010
|
+
str_vectorize = np.vectorize(str)
|
|
676
1011
|
|
|
677
1012
|
def simplify_nested_powers(expr):
|
|
678
1013
|
# Replace (x**n)**(1/n) with x
|
|
@@ -693,82 +1028,76 @@ class FeatureExpansion:
|
|
|
693
1028
|
flatten_pow_chain
|
|
694
1029
|
)
|
|
695
1030
|
|
|
696
|
-
|
|
697
|
-
|
|
698
|
-
|
|
699
|
-
|
|
700
|
-
|
|
701
|
-
return
|
|
702
|
-
np.array(
|
|
703
|
-
[str(sp.simplify(simplify_nested_powers(name)))for name in feature_names]),
|
|
704
|
-
X)
|
|
1031
|
+
if crung == 0:
|
|
1032
|
+
self.start_time = time()
|
|
1033
|
+
symbols, names, X = self.remove_redundant_features(X=X, names=names, symbols=symbols)
|
|
1034
|
+
if crung==self.rung:
|
|
1035
|
+
if verbose: print('Completed {0} rounds of feature transformations'.format(self.rung))
|
|
1036
|
+
return symbols, names, X
|
|
705
1037
|
else:
|
|
706
|
-
if verbose: print('
|
|
707
|
-
|
|
708
|
-
|
|
709
|
-
|
|
710
|
-
|
|
711
|
-
|
|
712
|
-
|
|
713
|
-
|
|
714
|
-
|
|
715
|
-
|
|
716
|
-
|
|
717
|
-
|
|
718
|
-
|
|
719
|
-
|
|
720
|
-
|
|
721
|
-
|
|
722
|
-
|
|
723
|
-
|
|
724
|
-
|
|
725
|
-
|
|
726
|
-
|
|
727
|
-
|
|
728
|
-
|
|
729
|
-
|
|
730
|
-
|
|
1038
|
+
if verbose: print('Applying round {0} of feature transformations'.format(crung+1))
|
|
1039
|
+
# if verbose: print('Estimating the creation of {0} features this iteration'.format(self.estimate_workload(p=X.shape[1], max_rung=1)))
|
|
1040
|
+
|
|
1041
|
+
new_names, new_symbols, new_X = None, None, None
|
|
1042
|
+
|
|
1043
|
+
for (op_key, rung_range) in self.ops:
|
|
1044
|
+
if crung in rung_range:
|
|
1045
|
+
if verbose>1: print('Applying operator {0} to {1} features'.format(op_key, X.shape[1]))
|
|
1046
|
+
op_params = OP_DICT[op_key]
|
|
1047
|
+
op_sym, op_np, inputs, comm = op_params['op'], op_params['op_np'], op_params['inputs'], op_params['commutative']
|
|
1048
|
+
if inputs == 1:
|
|
1049
|
+
sym_vect = np.vectorize(op_sym)
|
|
1050
|
+
new_op_symbols = sym_vect(symbols[prev_p:])
|
|
1051
|
+
new_op_X = op_np(X[:, prev_p:])
|
|
1052
|
+
new_op_names = str_vectorize(new_op_symbols)
|
|
1053
|
+
new_names, new_symbols, new_X = self.add_new(new_names=new_names, new_symbols=new_symbols, new_X=new_X,
|
|
1054
|
+
new_name=new_op_names, new_symbol=new_op_symbols, new_X_i=new_op_X, verbose=verbose)
|
|
1055
|
+
elif inputs == 2:
|
|
1056
|
+
for idx1 in range(prev_p, X.shape[1]):
|
|
1057
|
+
sym_vect = np.vectorize(lambda idx2: op_sym(symbols[idx1], symbols[idx2]))
|
|
1058
|
+
idx2 = range(idx1 if comm else X.shape[1])
|
|
1059
|
+
if len(idx2) > 0:
|
|
1060
|
+
new_op_symbols = sym_vect(idx2)
|
|
1061
|
+
new_op_names = str_vectorize(new_op_symbols)
|
|
1062
|
+
X_i = X[:, idx1]
|
|
1063
|
+
new_op_X = X_i[:, np.newaxis]*X[:, idx2]
|
|
1064
|
+
new_names, new_symbols, new_X = self.add_new(new_names=new_names, new_symbols=new_symbols, new_X=new_X,
|
|
1065
|
+
new_name=new_op_names, new_symbol=new_op_symbols, new_X_i=new_op_X, verbose=verbose)
|
|
1066
|
+
if not(new_names is None):
|
|
1067
|
+
names = np.concatenate((names, new_names))
|
|
1068
|
+
symbols = np.concatenate((symbols, new_symbols))
|
|
1069
|
+
prev_p = X.shape[1]
|
|
1070
|
+
X = np.hstack([X, new_X])
|
|
731
1071
|
else:
|
|
732
|
-
|
|
733
|
-
|
|
1072
|
+
prev_p = X.shape[1]
|
|
1073
|
+
|
|
1074
|
+
if verbose: print('After applying rounds {0} of feature transformations there are {1} features'.format(crung+1, X.shape[1]))
|
|
1075
|
+
if verbose: print('Removing redundant features leaves... ', end='')
|
|
1076
|
+
symbols, names, X = self.remove_redundant_features(X=X, names=names, symbols=symbols)
|
|
1077
|
+
if verbose: print('{0} features'.format(X.shape[1]))
|
|
1078
|
+
|
|
1079
|
+
return self.expand_aux(X=X, names=names, symbols=symbols, crung=crung+1, prev_p=prev_p, verbose=verbose)
|
|
1080
|
+
|
|
734
1081
|
if __name__ == "__main__":
|
|
1082
|
+
from sklearn.model_selection import train_test_split
|
|
735
1083
|
random_state = 0
|
|
736
|
-
n = 100
|
|
737
|
-
p = 10
|
|
738
|
-
rung = 3
|
|
739
|
-
s = 5
|
|
740
|
-
d = 4
|
|
741
|
-
|
|
742
1084
|
np.random.seed(random_state)
|
|
743
|
-
|
|
744
|
-
|
|
745
|
-
y =
|
|
746
|
-
|
|
747
|
-
|
|
748
|
-
|
|
749
|
-
|
|
750
|
-
|
|
751
|
-
|
|
752
|
-
|
|
753
|
-
|
|
754
|
-
|
|
755
|
-
|
|
756
|
-
|
|
757
|
-
|
|
758
|
-
|
|
759
|
-
|
|
760
|
-
|
|
761
|
-
|
|
762
|
-
|
|
763
|
-
y_hat_train = icl.predict(X_train_transformed)
|
|
764
|
-
|
|
765
|
-
print("Train rmse: " + str(rmse(y_hat_train, y_train)))
|
|
766
|
-
|
|
767
|
-
X_test = np.random.normal(size=(100*n, p))
|
|
768
|
-
X_test_transformed = FE.transform(X_test)
|
|
769
|
-
y_test = y(X_test)
|
|
770
|
-
y_hat_test = icl.predict(X_test_transformed)
|
|
771
|
-
print("Test rmse: " + str(rmse(y_hat_test, y_test)))
|
|
772
|
-
print("k={0}".format(len(icl.coef_[0])))
|
|
773
|
-
|
|
774
|
-
# print(icl.intermediates)
|
|
1085
|
+
n, p = 10000, 10
|
|
1086
|
+
X = np.random.random(size=(n,p))
|
|
1087
|
+
y = np.sqrt(X[:, 0]) - np.cbrt(X[:, 0]) + X[:, 0]**3 - np.log(X[:, 0]) + np.sin(X[:, 0]) + 1
|
|
1088
|
+
names = ['X_{0}'.format(i) for i in range(p)]
|
|
1089
|
+
|
|
1090
|
+
rung = 1
|
|
1091
|
+
small = ['sin', 'cos', 'log', 'abs', 'sqrt', 'cbrt', 'sq', 'cb', 'inv']
|
|
1092
|
+
big = ['six_pow', 'exp', 'add', 'mul', 'div', 'abs_diff']
|
|
1093
|
+
small = [(op, range(rung)) for op in small]
|
|
1094
|
+
big = [(op, range(1)) for op in big]
|
|
1095
|
+
ops = small+big
|
|
1096
|
+
|
|
1097
|
+
FE = FeatureExpansion(rung=rung, ops=ops)
|
|
1098
|
+
Phi_names, Phi_symbols, Phi_ = FE.expand(X=X, names=names, check_pos=True, verbose=True)
|
|
1099
|
+
X_train, X_test, y_train, y_test = train_test_split(Phi_, y, test_size=0.2, random_state=random_state)
|
|
1100
|
+
for i, s in enumerate([5]):
|
|
1101
|
+
icl = ICL(s=s, so=AdaptiveLASSO(gamma=1), k=5, fit_intercept=True, normalize=True, optimize_k=False, track_intermediates=True)
|
|
1102
|
+
icl.fit(X=X_train, y=y_train, feature_names = Phi_names, verbose=False)
|
|
1103
|
+
print(icl.repr_ensemble())
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: icol
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.7.4
|
|
4
4
|
Summary: Iterative Correlation Learning implementation
|
|
5
5
|
Author-email: Simon Teshuva <simon.teshuva@gmail.com>
|
|
6
6
|
License: MIT
|
|
@@ -13,7 +13,7 @@ Requires-Dist: scikit-learn>=1.2.2
|
|
|
13
13
|
# icol
|
|
14
14
|
** Iterative Correlation Learning in Python **
|
|
15
15
|
|
|
16
|
-
`icol` allows one to fit extremly sparse linear models from very high dimensional datasets in a computationally efficient manner.
|
|
16
|
+
`icol` allows one to fit extremly sparse linear models from very high dimensional datasets in a computationally efficient manner. We also include two feature expansion methods, allowing icol to be used as a Symbolic Regression tool.
|
|
17
17
|
|
|
18
18
|
---
|
|
19
19
|
|
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
icol/__init__.py,sha256=nnhJPjnFCpho8OB-5q-Mq8J91EeCV_o3KVO-lLC8tQY,173
|
|
2
|
+
icol/icol.py,sha256=59HIf4VKznrTKMVI46iz6eRXGLvvSfbGS1lQoLlJT1c,42179
|
|
3
|
+
icol-0.7.4.dist-info/LICENSE,sha256=aD00NFSvGfojy-IWFmtKpeSg262O0dWzmsfXAaT0xuk,1070
|
|
4
|
+
icol-0.7.4.dist-info/METADATA,sha256=ZE20mOaTldgxJtiMOHyVOsh23VjDIKk8r_Tmo8JHwGM,1977
|
|
5
|
+
icol-0.7.4.dist-info/WHEEL,sha256=iAkIy5fosb7FzIOwONchHf19Qu7_1wCWyFNR5gu9nU0,91
|
|
6
|
+
icol-0.7.4.dist-info/top_level.txt,sha256=OKisIKQUWtt2x-hxR53qbTr2AR3kdeRfTChIdmn2sDY,5
|
|
7
|
+
icol-0.7.4.dist-info/RECORD,,
|
icol-0.1.6.dist-info/RECORD
DELETED
|
@@ -1,7 +0,0 @@
|
|
|
1
|
-
icol/__init__.py,sha256=nnhJPjnFCpho8OB-5q-Mq8J91EeCV_o3KVO-lLC8tQY,173
|
|
2
|
-
icol/icol.py,sha256=Yh3xf64Z4vjo0aFiHUgTxAhVylNtZbyWHe3_4b6fnN8,28387
|
|
3
|
-
icol-0.1.6.dist-info/LICENSE,sha256=aD00NFSvGfojy-IWFmtKpeSg262O0dWzmsfXAaT0xuk,1070
|
|
4
|
-
icol-0.1.6.dist-info/METADATA,sha256=CexfevglpUbzgZUrINQ5GW38fj1YJsh2_GPwFO00SNs,1960
|
|
5
|
-
icol-0.1.6.dist-info/WHEEL,sha256=iAkIy5fosb7FzIOwONchHf19Qu7_1wCWyFNR5gu9nU0,91
|
|
6
|
-
icol-0.1.6.dist-info/top_level.txt,sha256=OKisIKQUWtt2x-hxR53qbTr2AR3kdeRfTChIdmn2sDY,5
|
|
7
|
-
icol-0.1.6.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|