icol 0.8.5__py3-none-any.whl → 0.9.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,321 @@
1
+ import warnings
2
+
3
+ from time import time
4
+ from copy import deepcopy
5
+ from itertools import combinations, permutations
6
+
7
+ import numpy as np
8
+ import sympy as sp
9
+
10
+ from sklearn.preprocessing import PolynomialFeatures
11
+ from sklearn.base import clone
12
+
13
+
14
+ OP_DICT = {
15
+ 'sin': {
16
+ 'op': sp.sin,
17
+ 'op_np': np.sin,
18
+ 'inputs': 1,
19
+ 'commutative': True,
20
+ 'cares_units': False
21
+ },
22
+ 'cos': {
23
+ 'op': sp.cos,
24
+ 'op_np': np.cos,
25
+ 'inputs': 1,
26
+ 'commutative': True,
27
+ 'cares_units': False
28
+ },
29
+ 'log': {
30
+ 'op': sp.log,
31
+ 'op_np': np.log,
32
+ 'inputs': 1,
33
+ 'commutative': True,
34
+ 'cares_units': False
35
+ },
36
+ 'exp': {
37
+ 'op': sp.exp,
38
+ 'op_np': np.exp,
39
+ 'inputs': 1,
40
+ 'commutative': True,
41
+ 'cares_units': False
42
+ },
43
+ 'abs': {
44
+ 'op': sp.Abs,
45
+ 'op_np': np.abs,
46
+ 'inputs': 1,
47
+ 'commutative': True,
48
+ 'cares_units': False
49
+ },
50
+ 'sqrt': {
51
+ 'op': sp.sqrt,
52
+ 'op_np': np.sqrt,
53
+ 'inputs': 1,
54
+ 'commutative': True,
55
+ 'cares_units': False
56
+ },
57
+ 'cbrt': {
58
+ 'op': lambda x: sp.Pow(x, sp.Rational(1, 3)),
59
+ 'op_np': lambda x: np.power(x, 1/3),
60
+ 'inputs': 1,
61
+ 'commutative': True,
62
+ 'cares_units': False
63
+ },
64
+ 'sq': {
65
+ 'op': lambda x: sp.Pow(x, 2),
66
+ 'op_np': lambda x: np.power(x, 2),
67
+ 'inputs': 1,
68
+ 'commutative': True,
69
+ 'cares_units': False
70
+ },
71
+ 'cb': {
72
+ 'op': lambda x: sp.Pow(x, 3),
73
+ 'op_np': lambda x: np.power(x, 3),
74
+ 'inputs': 1,
75
+ 'commutative': True,
76
+ 'cares_units': False
77
+ },
78
+ 'six_pow': {
79
+ 'op': lambda x: sp.Pow(x, 6),
80
+ 'op_np': lambda x: np.power(x, 6),
81
+ 'inputs': 1,
82
+ 'commutative': True,
83
+ 'cares_units': False
84
+ },
85
+ 'inv': {
86
+ 'op': lambda x: 1/x,
87
+ 'op_np': lambda x: 1/x,
88
+ 'inputs': 1,
89
+ 'commutative': True,
90
+ 'cares_units': False
91
+ },
92
+ 'mul': {
93
+ 'op': sp.Mul,
94
+ 'op_np': np.multiply,
95
+ 'inputs': 2,
96
+ 'commutative': True,
97
+ 'cares_units': False
98
+ },
99
+ 'div': {
100
+ 'op': lambda x, y: sp.Mul(x, 1/y),
101
+ 'op_np': lambda x, y: np.multiply(x, 1/y),
102
+ 'inputs': 2,
103
+ 'commutative': False,
104
+ 'cares_units': False
105
+ },
106
+ 'add': {
107
+ 'op': sp.Add,
108
+ 'op_np': lambda x, y: x+y,
109
+ 'inputs': 2,
110
+ 'commutative': True,
111
+ 'cares_units': False
112
+ },
113
+ 'sub': {
114
+ 'op': lambda x, y: sp.Add(x, -y),
115
+ 'op_np': lambda x, y: x-y,
116
+ 'inputs': 2,
117
+ 'commutative': False,
118
+ 'cares_units': False
119
+ },
120
+ 'abs_diff': {
121
+ 'op': lambda x, y: sp.Abs(sp.Add(x, -y)),
122
+ 'op_np': lambda x, y: np.abs(x-y),
123
+ 'inputs': 2,
124
+ 'commutative': True,
125
+ 'cares_units': False
126
+ },
127
+ }
128
+
129
+
130
+ class PolynomialFeaturesICL:
131
+ def __init__(self, rung, include_bias=False):
132
+ self.rung = rung
133
+ self.include_bias = include_bias
134
+ self.PolynomialFeatures = PolynomialFeatures(degree=self.rung, include_bias=self.include_bias)
135
+
136
+ def __str__(self):
137
+ return 'PolynomialFeatures(degree={0}, include_bias={1})'.format(self.rung, self.include_bias)
138
+
139
+ def __repr__(self):
140
+ return self.__str__()
141
+
142
+ def fit(self, X, y=None):
143
+ self.PolynomialFeatures.fit(X, y)
144
+ return self
145
+
146
+ def transform(self, X):
147
+ return self.PolynomialFeatures.transform(X)
148
+
149
+ def fit_transform(self, X, y=None):
150
+ return self.PolynomialFeatures.fit_transform(X, y)
151
+
152
+ def get_feature_names_out(self):
153
+ return self.PolynomialFeatures.get_feature_names_out()
154
+
155
+
156
+ class FeatureExpansion:
157
+ def __init__(self, ops, rung, printrate=1000):
158
+ self.ops = ops
159
+ self.rung = rung
160
+ self.printrate = printrate
161
+ self.prev_print = 0
162
+ for i, op in enumerate(self.ops):
163
+ if type(op) == str:
164
+ self.ops[i] = (op, range(rung))
165
+
166
+ def remove_redundant_features(self, symbols, names, X):
167
+ sorted_idxs = np.argsort(names)
168
+ for i, idx in enumerate(sorted_idxs):
169
+ if i == 0:
170
+ unique = [idx]
171
+ elif names[idx] != names[sorted_idxs[i-1]]:
172
+ unique += [idx]
173
+ unique_original_order = np.sort(unique)
174
+
175
+ return symbols[unique_original_order], names[unique_original_order], X[:, unique_original_order]
176
+
177
+ def expand(self, X, y=None, names=None, verbose=False, f=None, check_pos=False):
178
+ n, p = X.shape
179
+ if (names is None) or (len(names) != p):
180
+ names = ['x_{0}'.format(i) for i in range(X.shape[1])]
181
+
182
+ if check_pos == False:
183
+ symbols = sp.symbols(' '.join(name.replace(' ', '.') for name in names))
184
+ else:
185
+ symbols = []
186
+ for i, name in enumerate(names):
187
+ name = name.replace(' ', '.')
188
+ if np.all(X[:, i] > 0):
189
+ sym = sp.symbols(name, real=True, positive=True)
190
+ else:
191
+ sym = sp.symbols(name, real=True)
192
+ symbols.append(sym)
193
+
194
+ symbols = np.array(symbols)
195
+ names = np.array(names)
196
+
197
+ if verbose: print('Estimating the creation of around {0} features'.format(self.estimate_workload(p=p, max_rung=self.rung, verbose=verbose>2)))
198
+
199
+ names, symbols, X = self.expand_aux(X=X, names=names, symbols=symbols, crung=0, prev_p=0, verbose=verbose)
200
+
201
+ if not(f is None):
202
+ import pandas as pd
203
+ df = pd.DataFrame(data=X, columns=names)
204
+ df['y'] = y
205
+ df.to_csv(f)
206
+
207
+ return names, symbols, X
208
+
209
+ def estimate_workload(self, p, max_rung,verbose=False):
210
+ p0 = 0
211
+ p1 = p
212
+ for rung in range(max_rung):
213
+ if verbose: print('Applying rung {0} expansion'.format(rung))
214
+ new_u, new_bc, new_bn = 0, 0, 0
215
+ for (op, rung_range) in self.ops:
216
+ if rung in rung_range:
217
+ if verbose: print('Applying {0} to {1} features will result in approximately '.format(op, p1-p0))
218
+ if OP_DICT[op]['inputs'] == 1:
219
+ new_u += p1
220
+ if verbose: print('{0} new features'.format(p1))
221
+ elif OP_DICT[op]['commutative'] == True:
222
+ new_bc += (1/2)*(p1 - p0 + 1)*(p0 + p1 + 2)
223
+ if verbose: print('{0} new features'.format((1/2)*(p1 - p0 + 1)*(p0 + p1 + 2)))
224
+ else:
225
+ new_bn += (p1 - p0 + 1)*(p0 + p1 + 2)
226
+ if verbose: print('{0} new features'.format((p1 - p0 + 1)*(p0 + p1 + 2)))
227
+ p0 = p1
228
+ p1 = p1 + new_u + new_bc + new_bn
229
+ if verbose: print('For a total of {0} features by rung {1}'.format(p1, rung))
230
+ return p1
231
+
232
+ def add_new(self, new_names, new_symbols, new_X, new_name, new_symbol, new_X_i, verbose=False):
233
+ valid = (np.isnan(new_X_i).sum(axis=0) + np.isposinf(new_X_i).sum(axis=0) + np.isneginf(new_X_i).sum(axis=0)) == 0
234
+ if new_names is None:
235
+ new_names = np.array(new_name[valid])
236
+ new_symbols = np.array(new_symbol[valid])
237
+ new_X = np.array(new_X_i[:, valid])
238
+ else:
239
+ new_names = np.concatenate((new_names, new_name[valid]))
240
+ new_symbols = np.concatenate((new_symbols, new_symbol[valid]))
241
+ new_X = np.hstack([new_X, new_X_i[:, valid]])
242
+ # if (verbose > 1) and not(new_names is None) and (len(new_names) % self.printrate == 0): print('Created {0} features so far'.format(len(new_names)))
243
+ if (verbose > 1) and not(new_names is None) and (len(new_names) - self.prev_print >= self.printrate):
244
+ self.prev_print = len(new_names)
245
+ elapsed = np.round(time() - self.start_time, 2)
246
+ print('Created {0} features so far in {1} seconds'.format(len(new_names),elapsed))
247
+ return new_names, new_symbols, new_X
248
+
249
+ def expand_aux(self, X, names, symbols, crung, prev_p, verbose=False):
250
+
251
+ str_vectorize = np.vectorize(str)
252
+
253
+ def simplify_nested_powers(expr):
254
+ # Replace (x**n)**(1/n) with x
255
+ def flatten_pow_chain(e):
256
+ if isinstance(e, sp.Pow) and isinstance(e.base, sp.Pow):
257
+ base, inner_exp = e.base.args
258
+ outer_exp = e.exp
259
+ combined_exp = inner_exp * outer_exp
260
+ if sp.simplify(combined_exp) == 1:
261
+ return base
262
+ return sp.Pow(base, combined_exp)
263
+ elif isinstance(e, sp.Pow) and sp.simplify(e.exp) == 1:
264
+ return e.base
265
+ return e
266
+ # Apply recursively
267
+ return expr.replace(
268
+ lambda e: isinstance(e, sp.Pow),
269
+ flatten_pow_chain
270
+ )
271
+
272
+ if crung == 0:
273
+ self.start_time = time()
274
+ symbols, names, X = self.remove_redundant_features(X=X, names=names, symbols=symbols)
275
+ if crung==self.rung:
276
+ if verbose: print('Completed {0} rounds of feature transformations'.format(self.rung))
277
+ return symbols, names, X
278
+ else:
279
+ if verbose: print('Applying round {0} of feature transformations'.format(crung+1))
280
+ # if verbose: print('Estimating the creation of {0} features this iteration'.format(self.estimate_workload(p=X.shape[1], max_rung=1)))
281
+
282
+ new_names, new_symbols, new_X = None, None, None
283
+
284
+ for (op_key, rung_range) in self.ops:
285
+ if crung in rung_range:
286
+ if verbose>1: print('Applying operator {0} to {1} features'.format(op_key, X.shape[1]))
287
+ op_params = OP_DICT[op_key]
288
+ op_sym, op_np, inputs, comm = op_params['op'], op_params['op_np'], op_params['inputs'], op_params['commutative']
289
+ if inputs == 1:
290
+ sym_vect = np.vectorize(op_sym)
291
+ new_op_symbols = sym_vect(symbols[prev_p:])
292
+ new_op_X = op_np(X[:, prev_p:])
293
+ new_op_names = str_vectorize(new_op_symbols)
294
+ new_names, new_symbols, new_X = self.add_new(new_names=new_names, new_symbols=new_symbols, new_X=new_X,
295
+ new_name=new_op_names, new_symbol=new_op_symbols, new_X_i=new_op_X, verbose=verbose)
296
+ elif inputs == 2:
297
+ for idx1 in range(prev_p, X.shape[1]):
298
+ sym_vect = np.vectorize(lambda idx2: op_sym(symbols[idx1], symbols[idx2]))
299
+ idx2 = range(idx1 if comm else X.shape[1])
300
+ if len(idx2) > 0:
301
+ new_op_symbols = sym_vect(idx2)
302
+ new_op_names = str_vectorize(new_op_symbols)
303
+ X_i = X[:, idx1]
304
+ new_op_X = op_np(X_i[:, np.newaxis], X[:, idx2]) #X_i[:, np.newaxis]*X[:, idx2]
305
+ new_names, new_symbols, new_X = self.add_new(new_names=new_names, new_symbols=new_symbols, new_X=new_X,
306
+ new_name=new_op_names, new_symbol=new_op_symbols, new_X_i=new_op_X, verbose=verbose)
307
+ if not(new_names is None):
308
+ names = np.concatenate((names, new_names))
309
+ symbols = np.concatenate((symbols, new_symbols))
310
+ prev_p = X.shape[1]
311
+ X = np.hstack([X, new_X])
312
+ else:
313
+ prev_p = X.shape[1]
314
+
315
+ if verbose: print('After applying rounds {0} of feature transformations there are {1} features'.format(crung+1, X.shape[1]))
316
+ if verbose: print('Removing redundant features leaves... ', end='')
317
+ symbols, names, X = self.remove_redundant_features(X=X, names=names, symbols=symbols)
318
+ if verbose: print('{0} features'.format(X.shape[1]))
319
+
320
+ return self.expand_aux(X=X, names=names, symbols=symbols, crung=crung+1, prev_p=prev_p, verbose=verbose)
321
+