poseigen-compass 0.0.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
Copyright 2025 Husam Abdulnabi
|
|
2
|
+
|
|
3
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the “Software”), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
|
|
4
|
+
|
|
5
|
+
The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
|
|
6
|
+
|
|
7
|
+
THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
Metadata-Version: 2.3
|
|
2
|
+
Name: poseigen_compass
|
|
3
|
+
Version: 0.0.2
|
|
4
|
+
Summary: The basics for Poseigen software
|
|
5
|
+
License: MIT
|
|
6
|
+
Author: Husam Abdulnabi
|
|
7
|
+
Author-email: husam.abdulnabi@gmail.com
|
|
8
|
+
Maintainer: Husam Abdulnabi
|
|
9
|
+
Maintainer-email: husam.abdulnabi@gmail.com
|
|
10
|
+
Requires-Python: >=3.12
|
|
11
|
+
Classifier: Development Status :: 3 - Alpha
|
|
12
|
+
Classifier: Intended Audience :: Developers
|
|
13
|
+
Classifier: Topic :: Software Development
|
|
14
|
+
Classifier: Programming Language :: Python :: 3
|
|
15
|
+
Requires-Dist: numpy
|
|
16
|
+
Requires-Dist: pandas
|
|
17
|
+
Requires-Dist: poseigen_seaside
|
|
18
|
+
Project-URL: Homepage, https://github.com/husam94/poseigen_compass
|
|
19
|
+
Description-Content-Type: text/markdown
|
|
20
|
+
|
|
21
|
+
Will do
|
|
@@ -0,0 +1,5 @@
|
|
|
1
|
+
poseigen_compass.py,sha256=kETnA0rYXGPVnyiwr-Peiocxe9Qx7dz62ifkMm4vxic,21395
|
|
2
|
+
poseigen_compass-0.0.2.dist-info/LICENSE.txt,sha256=k0BUnY6wRRyp1AeU0K8rK_DyvcwNc0dH1NJJOJsp1SA,1068
|
|
3
|
+
poseigen_compass-0.0.2.dist-info/METADATA,sha256=BqPoHWuCBVIS1eQaS921cxhq6oOFUTTM2-JGghSdzRM,644
|
|
4
|
+
poseigen_compass-0.0.2.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
|
|
5
|
+
poseigen_compass-0.0.2.dist-info/RECORD,,
|
poseigen_compass.py
ADDED
|
@@ -0,0 +1,604 @@
|
|
|
1
|
+
import numpy as np
|
|
2
|
+
import pandas as pd
|
|
3
|
+
import os
|
|
4
|
+
import copy
|
|
5
|
+
import glob, shutil
|
|
6
|
+
#-----------------------------
|
|
7
|
+
import poseigen_seaside.basics as se
|
|
8
|
+
import poseigen_seaside.metrics as mex
|
|
9
|
+
#-----------------------------
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def VarTra_exp(inp, inverse = False):
|
|
13
|
+
# TRICK IS YOU DO REVNUMBERS AFTER IN THE CALL
|
|
14
|
+
return np.log(inp) if inverse == False else np.exp(inp)
|
|
15
|
+
|
|
16
|
+
def RevNumbers(inp, min = 0, max = 10):
|
|
17
|
+
return max - inp + min
|
|
18
|
+
|
|
19
|
+
def RandomCanGen(VarDict, num_can, num_gen = 10, configspace = False):
|
|
20
|
+
|
|
21
|
+
# NO LHC HERE.
|
|
22
|
+
|
|
23
|
+
# adding num_gen which multiplies num_can so that we get UNIQUE ones.
|
|
24
|
+
|
|
25
|
+
num_cangen = num_can * num_gen
|
|
26
|
+
|
|
27
|
+
NewCanDict = {}
|
|
28
|
+
|
|
29
|
+
if configspace is False:
|
|
30
|
+
|
|
31
|
+
GenCanDict = {}
|
|
32
|
+
for i in range(num_cangen): GenCanDict[i] = {}
|
|
33
|
+
|
|
34
|
+
sample = np.random.uniform(size = (num_cangen, len(VarDict))) #creates random numbers between 0 and 1
|
|
35
|
+
|
|
36
|
+
for ik, key in enumerate(VarDict):
|
|
37
|
+
if VarDict[key][1] == 'cat':
|
|
38
|
+
lvk = len(VarDict[key][0])
|
|
39
|
+
c = np.random.choice(range(lvk), size = num_cangen)
|
|
40
|
+
r = [VarDict[key][0][u] for u in c]
|
|
41
|
+
|
|
42
|
+
else:
|
|
43
|
+
n,m = VarDict[key][0][0], VarDict[key][0][1]
|
|
44
|
+
|
|
45
|
+
if len(VarDict[key]) > 2:
|
|
46
|
+
n,m = (VarDict[key][2](q) for q in (n,m))
|
|
47
|
+
|
|
48
|
+
g = sample[:, ik] * (m - n) + n
|
|
49
|
+
|
|
50
|
+
if len(VarDict[key]) > 2:
|
|
51
|
+
g = [VarDict[key][2](jj, inverse = True) for jj in g]
|
|
52
|
+
|
|
53
|
+
if VarDict[key][2] == VarTra_exp:
|
|
54
|
+
g = RevNumbers(g, VarDict[key][0][0], VarDict[key][0][1])
|
|
55
|
+
|
|
56
|
+
r = np.round(g).astype(int) if VarDict[key][1] == 'int' else g
|
|
57
|
+
r = r.tolist()
|
|
58
|
+
|
|
59
|
+
for i in range(num_cangen): GenCanDict[i][key] = r[i]
|
|
60
|
+
|
|
61
|
+
# NOW TO GET UNIQUE:
|
|
62
|
+
|
|
63
|
+
NewCanDict = se.UniqueNestedDict(GenCanDict, keepkey = False)
|
|
64
|
+
NewCanDict = {k:v for k,v in NewCanDict.items() if k < num_can}
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
else:
|
|
68
|
+
configs = VarDict.sample_configuration(int(num_can))
|
|
69
|
+
for i in range(num_can): NewCanDict[i] = dict(configs[i])
|
|
70
|
+
|
|
71
|
+
return NewCanDict
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
################################################################################
|
|
76
|
+
|
|
77
|
+
def StandardCanScorer(algo, algo_args, data, Splits = None, metrics_mode= [mex.AError, {}], add_metrics_modes = None,
|
|
78
|
+
pathname = None, returnmodel = False):
|
|
79
|
+
|
|
80
|
+
#add_metrics_mode is optional but if specified is a list of functions and a list of their respective arguments.
|
|
81
|
+
|
|
82
|
+
if Splits is not None: dataz = [[d[Splits[s][t]] for d in data] for t in [0,1]]
|
|
83
|
+
elif isinstance(data, dict): dataz = data[data.keys[s]]
|
|
84
|
+
else: dataz = data
|
|
85
|
+
|
|
86
|
+
m = algo(**algo_args)
|
|
87
|
+
m.fit(*dataz[0])
|
|
88
|
+
y_hat = m.predict(dataz[1][0])
|
|
89
|
+
score = metrics_mode[0](y_hat, *dataz[1][1:], **metrics_mode[1])
|
|
90
|
+
|
|
91
|
+
if pathname is not None: se.PickleDump(m, pathname)
|
|
92
|
+
|
|
93
|
+
add_scores = []
|
|
94
|
+
if add_metrics_modes is not None:
|
|
95
|
+
lamm = len(add_metrics_modes) // 2
|
|
96
|
+
for amm in range(lamm):
|
|
97
|
+
add_scores.append(add_metrics_modes[amm](y_hat, *dataz[1][1:], **metrics_mode[amm+lamm]))
|
|
98
|
+
|
|
99
|
+
score = [score, add_scores]
|
|
100
|
+
|
|
101
|
+
return score if returnmodel is False else (score, m)
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
################################################################################
|
|
107
|
+
|
|
108
|
+
|
|
109
|
+
def ModelEvalHelper(c,
|
|
110
|
+
algo, CanDict, data, Splits = None, repeats = 1,
|
|
111
|
+
CS_mode = [StandardCanScorer, {'metrics_mode': [mex.AError, {'expo': 2}]}], CSDict = {},
|
|
112
|
+
lmd = 1, lsp = 1,
|
|
113
|
+
statusprints = True, pathname = None, savemodels = False, pn_Can = None):
|
|
114
|
+
|
|
115
|
+
#Sept 25 modification: Data splitting is handled by the scorer.
|
|
116
|
+
|
|
117
|
+
i, s, r = c
|
|
118
|
+
|
|
119
|
+
Split = Splits[s] if Splits is not None else None
|
|
120
|
+
|
|
121
|
+
if statusprints:
|
|
122
|
+
if r == 0 and s == 0:
|
|
123
|
+
itos = CanDict[i].items() if statusprints == True else [(k, CanDict[i][k]) for k in statusprints]
|
|
124
|
+
print(f' Model {i+1} of {lmd}: {itos}')
|
|
125
|
+
print(f' cross val {s+1} of {lsp}, Repeat {r+1} of {repeats}')
|
|
126
|
+
|
|
127
|
+
savepath = None
|
|
128
|
+
if pathname is not None and savemodels == True:
|
|
129
|
+
savepath = pathname + str(i) + '_' + str(s) + '_' + str(r)
|
|
130
|
+
|
|
131
|
+
meh_args = {}
|
|
132
|
+
if data is not None: meh_args['data'] = data
|
|
133
|
+
if Split is not None: meh_args['Split'] = Split
|
|
134
|
+
score = CS_mode[0](algo, CanDict[i], **meh_args,
|
|
135
|
+
pathname = savepath, **CSDict[i])
|
|
136
|
+
|
|
137
|
+
if statusprints: print(f" Model {i+1}, cross val {s+1}, Repeat {r+1}: {score}")
|
|
138
|
+
|
|
139
|
+
return score
|
|
140
|
+
|
|
141
|
+
def CanEvaluator(algo, CanDict, data, Splits = None, repeats = 1, parallel = False,
|
|
142
|
+
CS_mode = [StandardCanScorer, {'metrics_mode': [mex.AError, {'expo': 2}]}], CS_vars = None,
|
|
143
|
+
pickup = False, statusprints = True, pathname = None, savemodels = False,
|
|
144
|
+
ext = None):
|
|
145
|
+
|
|
146
|
+
#June 29, ADDED PARALLELIZATION!
|
|
147
|
+
|
|
148
|
+
#For YAHPO gym, adding a data is None option. If Data is None, it is assumed to be a surrogate operation.
|
|
149
|
+
|
|
150
|
+
|
|
151
|
+
if ext is True: ext = 'CanEval'
|
|
152
|
+
newpathname = se.NewFolder(pathname, ext = ext)
|
|
153
|
+
|
|
154
|
+
if pathname is None: pickup = False
|
|
155
|
+
else: pn_met = newpathname + 'Mets' + '.p'
|
|
156
|
+
|
|
157
|
+
CDK = CanDict.keys()
|
|
158
|
+
lmd = len(CDK)
|
|
159
|
+
|
|
160
|
+
CSDict = {}
|
|
161
|
+
for i in CDK:
|
|
162
|
+
CSDict[i] = CS_mode[1]
|
|
163
|
+
if CS_vars is not None:
|
|
164
|
+
x = {v: CanDict[i][v] for v in CS_vars}
|
|
165
|
+
CSDict[i].update(x)
|
|
166
|
+
for v in CS_vars:
|
|
167
|
+
del CanDict[i][v]
|
|
168
|
+
|
|
169
|
+
#Each split has to be in the format: [array, array]
|
|
170
|
+
lsp = 1
|
|
171
|
+
if Splits is not None:
|
|
172
|
+
if isinstance(Splits[0], list) is False: Splits = [Splits]
|
|
173
|
+
lsp = len(Splits)
|
|
174
|
+
elif isinstance(data, dict): lsp = len(data)
|
|
175
|
+
|
|
176
|
+
modelcombos = []
|
|
177
|
+
newmetrics = {}
|
|
178
|
+
for i in CDK:
|
|
179
|
+
newmetrics[i] = {}
|
|
180
|
+
for s in range(lsp):
|
|
181
|
+
newmetrics[i][s] = {}
|
|
182
|
+
for r in range(repeats):
|
|
183
|
+
newmetrics[i][s][r] = None
|
|
184
|
+
modelcombos.append([i, s, r])
|
|
185
|
+
|
|
186
|
+
if pickup and os.path.isfile(pn_met):
|
|
187
|
+
oldmetrics = se.PickleLoad(pn_met)
|
|
188
|
+
for i in oldmetrics.keys():
|
|
189
|
+
if i < lmd:
|
|
190
|
+
for s in oldmetrics[i].keys():
|
|
191
|
+
for r in oldmetrics[i][s].keys():
|
|
192
|
+
newmetrics[i][s][r] = oldmetrics[i][s][r]
|
|
193
|
+
|
|
194
|
+
print(newmetrics)
|
|
195
|
+
|
|
196
|
+
MEH_args = {'algo': algo, 'CanDict': CanDict, 'data': data, 'Splits': Splits, 'repeats': repeats,
|
|
197
|
+
'CS_mode': CS_mode, 'CSDict': CSDict, 'lmd': lmd, 'lsp': lsp,
|
|
198
|
+
'statusprints': statusprints, 'pathname': newpathname, 'savemodels': savemodels}
|
|
199
|
+
|
|
200
|
+
#BELOW NEEDS WORK SINCE CHANGING METRICS TO A DICTIONARY
|
|
201
|
+
#if parallel > 1: #Needs to be an int > 1
|
|
202
|
+
#pool = multiprocessing.Pool(parallel)
|
|
203
|
+
#metrics = pool.map(*pack_function_for_map(ModelEvalHelper, modelcombos[len(metrics):], **MEH_args))
|
|
204
|
+
#if pathname is not None: pickle.dump(metrics, open(pathname + pn_CEMe + '.p', 'wb'))
|
|
205
|
+
#else #benefit of non-parallel is the pickup. Can we do pickup with
|
|
206
|
+
|
|
207
|
+
for c in modelcombos:
|
|
208
|
+
i, s, r = c
|
|
209
|
+
if newmetrics[i][s][r] is None:
|
|
210
|
+
newmetrics[i][s][r] = ModelEvalHelper(c, **MEH_args)
|
|
211
|
+
if pathname is not None: se.PickleDump(newmetrics, pn_met)
|
|
212
|
+
|
|
213
|
+
return newmetrics
|
|
214
|
+
|
|
215
|
+
|
|
216
|
+
|
|
217
|
+
|
|
218
|
+
|
|
219
|
+
def RandomOpt(algo, VarDict, data, Splits = None,
|
|
220
|
+
budget = 20, repeats = 1,
|
|
221
|
+
CS_mode = [StandardCanScorer, {'metrics_mode': [mex.AError, {'expo': 2}]}], CS_vars = None,
|
|
222
|
+
RMG_args = {}, configspace = False,
|
|
223
|
+
|
|
224
|
+
smallest = None, #PLACEHOLDER, DOESNT DO SHIT
|
|
225
|
+
|
|
226
|
+
pickup = False, statusprints = True, pathname = None, savemodels = False, ext = None):
|
|
227
|
+
|
|
228
|
+
######################################################
|
|
229
|
+
|
|
230
|
+
if ext is True: ext = 'RandomOpt'
|
|
231
|
+
newpathname = se.NewFolder(pathname, ext = ext)
|
|
232
|
+
|
|
233
|
+
#--------------------------
|
|
234
|
+
if pathname is not None:
|
|
235
|
+
|
|
236
|
+
CS_args = copy.deepcopy(CS_mode[1])
|
|
237
|
+
if 'trainer_args' in CS_args.keys():
|
|
238
|
+
for xo in ['inps', 'out', 'out_bind']:
|
|
239
|
+
CS_args['trainer_args'][xo] = None
|
|
240
|
+
|
|
241
|
+
RO_args = {'VarDict': VarDict, 'CS_mode': [CS_mode[0], CS_args]}
|
|
242
|
+
|
|
243
|
+
se.PickleDump(RO_args, newpathname + 'RO_args')
|
|
244
|
+
#--------------------------
|
|
245
|
+
|
|
246
|
+
if pathname is None: pickup = False
|
|
247
|
+
else: pn_OO = newpathname + 'Out' + '.p'
|
|
248
|
+
|
|
249
|
+
if pickup and os.path.isfile(pn_OO):
|
|
250
|
+
|
|
251
|
+
CanDict, metrics = se.PickleLoad(pn_OO)
|
|
252
|
+
|
|
253
|
+
# the following is an option to add more candidates in first round
|
|
254
|
+
lcdk = len(CanDict[0].keys())
|
|
255
|
+
difo = budget - lcdk
|
|
256
|
+
|
|
257
|
+
if difo < 0:
|
|
258
|
+
for i in list(CanDict[0].keys()):
|
|
259
|
+
if i >= budget: del CanDict[0][i]
|
|
260
|
+
|
|
261
|
+
if difo > 0: # DOES NOT GUARANTEE UNIQUE ONES!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
|
|
262
|
+
|
|
263
|
+
print('adding more')
|
|
264
|
+
|
|
265
|
+
moreCanDict = RandomCanGen(VarDict, difo * 20, **RMG_args) #WE TRY DIFO * 20 FOR MROE CHANCE OF UNIQUE.
|
|
266
|
+
|
|
267
|
+
cdo = copy.deepcopy(CanDict)
|
|
268
|
+
cdo[0].update({k+lcdk: v for k,v in moreCanDict.items()})
|
|
269
|
+
CanDict = se.UniqueNestedDict(cdo, keepkey = False) #THEN RESETS NUMBER
|
|
270
|
+
CanDict = {0: {k:v for k,v in CanDict[0].items() if k < budget}} #BOOM.
|
|
271
|
+
|
|
272
|
+
for i in list(CanDict.keys()):
|
|
273
|
+
if i > 0: del CanDict[i]
|
|
274
|
+
|
|
275
|
+
se.PickleDump([CanDict, metrics], pn_OO)
|
|
276
|
+
|
|
277
|
+
else:
|
|
278
|
+
CanDict, metrics = {}, {}
|
|
279
|
+
RMG_args.update({'configspace': configspace})
|
|
280
|
+
CanDict[0] = RandomCanGen(VarDict, budget, **RMG_args)
|
|
281
|
+
if newpathname is not None: se.PickleDump([CanDict, metrics], pn_OO)
|
|
282
|
+
|
|
283
|
+
######################################################
|
|
284
|
+
|
|
285
|
+
#to filter out keys that are only 1 value in statusprints
|
|
286
|
+
|
|
287
|
+
sps = statusprints
|
|
288
|
+
if statusprints:
|
|
289
|
+
sps = []
|
|
290
|
+
for key, val in VarDict.items():
|
|
291
|
+
if val[1] == 'cat':
|
|
292
|
+
if len(val[0]) > 1: sps.append(key)
|
|
293
|
+
else: sps.append(key)
|
|
294
|
+
|
|
295
|
+
######################################################
|
|
296
|
+
|
|
297
|
+
ro = 0
|
|
298
|
+
|
|
299
|
+
pn_can = newpathname + str(ro) if pathname is not None else None
|
|
300
|
+
|
|
301
|
+
metrics[ro] = CanEvaluator(algo, CanDict[ro], data,
|
|
302
|
+
Splits = Splits, repeats = repeats,
|
|
303
|
+
CS_vars = CS_vars, CS_mode = CS_mode,
|
|
304
|
+
pickup = pickup, statusprints = sps,
|
|
305
|
+
pathname = pn_can, savemodels = savemodels, ext = ext)
|
|
306
|
+
|
|
307
|
+
OptOut = [CanDict, metrics]
|
|
308
|
+
if newpathname is not None: se.PickleDump(OptOut,pn_OO)
|
|
309
|
+
|
|
310
|
+
|
|
311
|
+
return OptOut
|
|
312
|
+
|
|
313
|
+
|
|
314
|
+
|
|
315
|
+
def Metrics2Flat(metrics):
|
|
316
|
+
#metrics is now a dictionary where it is Model, Split, Repeat
|
|
317
|
+
flatmetrics = []
|
|
318
|
+
for i in metrics.keys():
|
|
319
|
+
for s in metrics[i].keys():
|
|
320
|
+
for r in metrics[i][s].keys():
|
|
321
|
+
flatmetrics.append(metrics[i][s][r])
|
|
322
|
+
return np.array(flatmetrics)
|
|
323
|
+
|
|
324
|
+
|
|
325
|
+
|
|
326
|
+
#############################################################################################
|
|
327
|
+
|
|
328
|
+
def TopCan(OptOut, num_top = 10,
|
|
329
|
+
reduce_func = None, smallest = True,
|
|
330
|
+
perround = False, ext = None):
|
|
331
|
+
|
|
332
|
+
# fromidxs = None,
|
|
333
|
+
# savemodels = False, ext = None):
|
|
334
|
+
|
|
335
|
+
#reduce_func summarizes the scores.
|
|
336
|
+
#perround returns the best cans per round. useful for optotp testing.
|
|
337
|
+
|
|
338
|
+
#This function just returns the best candidates(s) per round or general.
|
|
339
|
+
# If its per round, it returns a list of lists. If its not, it returns an array where each row has the [round, cand]
|
|
340
|
+
|
|
341
|
+
# Also, currently applies the reduce func over all splits and repeats.
|
|
342
|
+
|
|
343
|
+
if isinstance(OptOut, str):
|
|
344
|
+
|
|
345
|
+
if ext is True: ext = 'SurvOpt'
|
|
346
|
+
newpathname = se.NewFolder(OptOut, ext = ext)
|
|
347
|
+
CanDict, metrics = se.PickleLoad(newpathname + 'Out')
|
|
348
|
+
|
|
349
|
+
else: CanDict, metrics = OptOut
|
|
350
|
+
|
|
351
|
+
if reduce_func is None: reduce_func = np.nanmin if smallest else np.nanmax
|
|
352
|
+
k = 1 if smallest else -1
|
|
353
|
+
|
|
354
|
+
|
|
355
|
+
met_reduced = [np.array([reduce_func([metrics[k1][k2][k3][k4]
|
|
356
|
+
for k3 in metrics[k1][k2].keys()
|
|
357
|
+
for k4 in metrics[k1][k2][k3].keys()])
|
|
358
|
+
for k2 in metrics[k1].keys()]) for k1 in metrics.keys()]
|
|
359
|
+
|
|
360
|
+
if perround is False:
|
|
361
|
+
|
|
362
|
+
met_reduced_comb = np.concatenate(met_reduced)
|
|
363
|
+
round_idxs = np.concatenate([np.repeat(im, len(met)) for im, met in enumerate(met_reduced)])
|
|
364
|
+
can_idxs = np.concatenate([np.arange(len(met)) for met in met_reduced])
|
|
365
|
+
idxs_to_use = np.argsort(met_reduced_comb)[::k][:num_top]
|
|
366
|
+
top_idxs = np.array([x[idxs_to_use] for x in [round_idxs, can_idxs]]).T
|
|
367
|
+
|
|
368
|
+
else:
|
|
369
|
+
|
|
370
|
+
top_idxs = [np.array([np.repeat(imet, num_top), np.argsort(met)[::k][:num_top]]).T
|
|
371
|
+
for imet, met in enumerate(met_reduced)]
|
|
372
|
+
|
|
373
|
+
return top_idxs
|
|
374
|
+
|
|
375
|
+
|
|
376
|
+
def TopCanDict(OptOut, topcan, comb_rounds = True, ext = None):
|
|
377
|
+
|
|
378
|
+
if isinstance(OptOut, str):
|
|
379
|
+
|
|
380
|
+
if ext is True: ext = 'SurvOpt'
|
|
381
|
+
newpathname = se.NewFolder(OptOut, ext = ext)
|
|
382
|
+
CanDict, metrics = se.PickleLoad(newpathname + 'Out')
|
|
383
|
+
|
|
384
|
+
else: CanDict, metrics = OptOut
|
|
385
|
+
|
|
386
|
+
if isinstance(topcan, list) is False: topcan = [topcan]
|
|
387
|
+
|
|
388
|
+
if comb_rounds: NewCanDict = {str(tc[0]) + '_' + str(tc[1]): CanDict[tc[0]][tc[1]]
|
|
389
|
+
for tx in topcan for tc in tx}
|
|
390
|
+
|
|
391
|
+
else: NewCanDict = {tx[0, 0]: {tc[1]: CanDict[tc[0]][tc[1]] for tc in tx}
|
|
392
|
+
for tx in topcan}
|
|
393
|
+
|
|
394
|
+
return NewCanDict
|
|
395
|
+
|
|
396
|
+
|
|
397
|
+
def TopCanGetData(pathname, newpathname, topcan,
|
|
398
|
+
indiv_folders = True, #Gathers into individual folders
|
|
399
|
+
noget = None):
|
|
400
|
+
# automatically combines into new folder and gives you topcandict.
|
|
401
|
+
|
|
402
|
+
if isinstance(topcan, list) is False: topcan = [topcan]
|
|
403
|
+
|
|
404
|
+
newpn = se.NewFolder(newpathname)
|
|
405
|
+
|
|
406
|
+
for tx in topcan:
|
|
407
|
+
|
|
408
|
+
pn_tx = se.NewFolder(pathname + str(tx[0][0]))
|
|
409
|
+
|
|
410
|
+
for tc in tx:
|
|
411
|
+
|
|
412
|
+
tc_prefix = str(tc[1]) + '_'
|
|
413
|
+
targnames = glob.glob(pn_tx + tc_prefix + '*')
|
|
414
|
+
|
|
415
|
+
if noget is not None:
|
|
416
|
+
if isinstance(noget, list) is False: noget = [noget]
|
|
417
|
+
targnames = [tn for tn in targnames if not any(tg in tn for tg in noget)]
|
|
418
|
+
|
|
419
|
+
new_tc_prefix = str(tc[0]) + '_' + str(tc[1])
|
|
420
|
+
|
|
421
|
+
new_tc_prefix = se.NewFolder(newpn + new_tc_prefix) if indiv_folders else newpn + new_tc_prefix + '_'
|
|
422
|
+
|
|
423
|
+
newnames = [new_tc_prefix + t[len(pn_tx) + len(tc_prefix):] for t in targnames]
|
|
424
|
+
|
|
425
|
+
for t,n in zip(targnames, newnames): shutil.copyfile(t, n)
|
|
426
|
+
|
|
427
|
+
return
|
|
428
|
+
|
|
429
|
+
|
|
430
|
+
|
|
431
|
+
|
|
432
|
+
|
|
433
|
+
#################################################
|
|
434
|
+
|
|
435
|
+
# ENSEMBLE STUFF #
|
|
436
|
+
|
|
437
|
+
def PredScorer(pred,
|
|
438
|
+
out, out_std = None, out_weights = None, out_bind = None,
|
|
439
|
+
split = None, metrics_mode = None,
|
|
440
|
+
pred_modif_mode = None):
|
|
441
|
+
|
|
442
|
+
if pred_modif_mode is not None: pred = pred_modif_mode[0](pred, **pred_modif_mode[1])
|
|
443
|
+
|
|
444
|
+
pred_sp, out_sp = [[xo[s] for s in split] for xo in [pred, out]]
|
|
445
|
+
|
|
446
|
+
ozx = []
|
|
447
|
+
for oz in [out_std, out_weights, out_bind]:
|
|
448
|
+
if oz is not None:
|
|
449
|
+
ozx.append([oz[s] for s in split])
|
|
450
|
+
else: ozx.append([None] * len(split))
|
|
451
|
+
|
|
452
|
+
out_std_sp, out_weights_sp, out_bind_sp = ozx
|
|
453
|
+
|
|
454
|
+
mets = []
|
|
455
|
+
for ps, os, ss, ws, bs in zip(pred_sp, out_sp, out_std_sp, out_weights_sp, out_bind_sp):
|
|
456
|
+
|
|
457
|
+
mm_args = copy.deepcopy(metrics_mode[1])
|
|
458
|
+
|
|
459
|
+
if ss is not None: mm_args['std'] = ss
|
|
460
|
+
if ws is not None: mm_args['weights'] = ws
|
|
461
|
+
if bs is not None: mm_args['bind'] = bs
|
|
462
|
+
|
|
463
|
+
me = metrics_mode[0](ps, os, **mm_args)
|
|
464
|
+
mets.append(me)
|
|
465
|
+
|
|
466
|
+
return np.array(mets) #Should be a 1 dim vector
|
|
467
|
+
|
|
468
|
+
|
|
469
|
+
|
|
470
|
+
|
|
471
|
+
def remove_outliers(inp, std_cutoff = 2, ddof = 0, return_idx = True):
|
|
472
|
+
#inp is a 1 dim list of vals
|
|
473
|
+
inp = np.array(inp) # just in case
|
|
474
|
+
ret = np.where(abs(inp - np.mean(inp)) < std_cutoff * np.std(inp, ddof = ddof))[0]
|
|
475
|
+
if return_idx is False:
|
|
476
|
+
ret = inp[ret]
|
|
477
|
+
return ret if return_idx else inp[ret]
|
|
478
|
+
|
|
479
|
+
def EnsembleIdxs(pn, std_cutoff = 2, ddof = 1):
|
|
480
|
+
#pn is that path to the folder
|
|
481
|
+
mets = se.PickleLoad(pn + 'Mets')[0]
|
|
482
|
+
mets2 = [y for x,y in mets.items()]
|
|
483
|
+
return remove_outliers(mets2, std_cutoff=std_cutoff, ddof = ddof, return_idx = True)
|
|
484
|
+
|
|
485
|
+
def EnsembleScorer(preds,
|
|
486
|
+
out, out_std = None, out_weights = None, out_bind = None,
|
|
487
|
+
|
|
488
|
+
split = None, metrics_mode = None,
|
|
489
|
+
score_on = 1,
|
|
490
|
+
std_cutoff = None, ddof = 1,
|
|
491
|
+
top = None, smallest = True,
|
|
492
|
+
pred_modif_mode = None,
|
|
493
|
+
pathname = None, return_extra = False):
|
|
494
|
+
# split is [train, stoppage, xx, xx]
|
|
495
|
+
# The std_cutoff is based on the performance on the stoppage set.
|
|
496
|
+
# preds is a list of preds where the length is each observation
|
|
497
|
+
|
|
498
|
+
#load_idx_keep is a pathname to go and get the idx_keep from.
|
|
499
|
+
|
|
500
|
+
if isinstance(preds[0], str): preds = [se.PickleLoad(p) for p in preds]
|
|
501
|
+
preds = np.array(preds)
|
|
502
|
+
|
|
503
|
+
|
|
504
|
+
|
|
505
|
+
##################################
|
|
506
|
+
|
|
507
|
+
ps_args = {'out': out, 'out_std': out_std,
|
|
508
|
+
'out_weights': out_weights, 'out_bind': out_bind,
|
|
509
|
+
'split': split, 'metrics_mode': metrics_mode,
|
|
510
|
+
'pred_modif_mode': pred_modif_mode}
|
|
511
|
+
|
|
512
|
+
mets_all = np.array([PredScorer(pred, **ps_args) for pred in preds])
|
|
513
|
+
|
|
514
|
+
stop_mets = mets_all[:, score_on] if len(mets_all.shape) > 1 else mets_all
|
|
515
|
+
|
|
516
|
+
if std_cutoff is not None:
|
|
517
|
+
idx_keep = remove_outliers(stop_mets, std_cutoff=std_cutoff,
|
|
518
|
+
ddof = ddof, return_idx = True)
|
|
519
|
+
elif top is not None:
|
|
520
|
+
k = 1 if smallest else -1
|
|
521
|
+
idx_keep = np.argsort(stop_mets)[::k][:top]
|
|
522
|
+
|
|
523
|
+
else: idx_keep = np.arange(len(stop_mets))
|
|
524
|
+
|
|
525
|
+
preds_keep = preds[idx_keep]
|
|
526
|
+
|
|
527
|
+
##################################
|
|
528
|
+
|
|
529
|
+
preds_ensemb = np.mean(preds_keep, axis = 0)
|
|
530
|
+
preds_ensemb_scores = PredScorer(preds_ensemb, **ps_args)
|
|
531
|
+
|
|
532
|
+
if pathname is not None:
|
|
533
|
+
for g, gn in zip([idx_keep, preds_keep, preds_ensemb, preds_ensemb_scores],
|
|
534
|
+
['idx', 'preds_keep', 'preds', 'scores']):
|
|
535
|
+
se.PickleDump(g, pathname + 'ensemble_' + gn)
|
|
536
|
+
|
|
537
|
+
return (preds_ensemb_scores, preds_ensemb, idx_keep) if return_extra else preds_ensemb_scores
|
|
538
|
+
|
|
539
|
+
|
|
540
|
+
def Bootstrapper(inp, mode = [], iters = 100,
|
|
541
|
+
boots_idx = None,
|
|
542
|
+
pathname = None,
|
|
543
|
+
|
|
544
|
+
updates = 10, return_idx = False):
|
|
545
|
+
|
|
546
|
+
inp = np.array(inp)
|
|
547
|
+
li = len(inp)
|
|
548
|
+
rango = np.arange(li)
|
|
549
|
+
|
|
550
|
+
bs_idx = np.array([np.random.choice(rango, size = li, replace = True)
|
|
551
|
+
for _ in np.arange(iters)]) if boots_idx is None else boots_idx
|
|
552
|
+
|
|
553
|
+
prods = []
|
|
554
|
+
for nidx, idx in enumerate(bs_idx):
|
|
555
|
+
prods.append(mode[0](inp[idx], **mode[1]))
|
|
556
|
+
if updates is not None:
|
|
557
|
+
if nidx % updates == 0: print(f'finished {nidx}')
|
|
558
|
+
|
|
559
|
+
if pathname is None:
|
|
560
|
+
prods = np.array([mode[0](inp[idx], **mode[1]) for idx in bs_idx])
|
|
561
|
+
else:
|
|
562
|
+
prods = np.array([mode[0](inp[idx], **mode[1],
|
|
563
|
+
pathname = se.NewFolder(pathname + str(im)))
|
|
564
|
+
for im, idx in enumerate(bs_idx)])
|
|
565
|
+
|
|
566
|
+
return (prods, bs_idx) if return_idx else prods
|
|
567
|
+
|
|
568
|
+
def PairwiseBootstrapper(inp1, inp2,
|
|
569
|
+
mode1 = [], mode2 = [],
|
|
570
|
+
iters = 10, updates = None):
|
|
571
|
+
|
|
572
|
+
#performs boostrapping on two inputs using mode1 and then does mode2 to compare the bootstrap inps.
|
|
573
|
+
|
|
574
|
+
prods1 = Bootstrapper(inp1, mode = mode1, iters = iters, updates = updates)
|
|
575
|
+
prods2 = Bootstrapper(inp2, mode = mode1, iters = iters, updates = updates)
|
|
576
|
+
|
|
577
|
+
return mode2[0](prods1, prods2, **mode2[1])
|
|
578
|
+
|
|
579
|
+
def BootstrapConfidenceInterval(inp, alpha = 0.95,
|
|
580
|
+
onesided = None,
|
|
581
|
+
axis = None):
|
|
582
|
+
|
|
583
|
+
#can be onsided 'greater' or 'lesser'.
|
|
584
|
+
|
|
585
|
+
if onesided is None:
|
|
586
|
+
alx = (1-alpha) / 2
|
|
587
|
+
p_lower = alx * 100
|
|
588
|
+
p_higher = (1-alx) * 100
|
|
589
|
+
elif onesided == 'lesser':
|
|
590
|
+
p_lower = 0
|
|
591
|
+
p_higher = alpha * 100
|
|
592
|
+
elif onesided == 'greater':
|
|
593
|
+
p_lower = (1 - alpha) * 100
|
|
594
|
+
p_higher = 100
|
|
595
|
+
|
|
596
|
+
return np.array([np.percentile(np.sort(inp), p, axis = axis) for p in [p_lower, p_higher]])
|
|
597
|
+
|
|
598
|
+
def BootstrapStandardError(inp, ddof = 0, axis = None):
|
|
599
|
+
|
|
600
|
+
return np.std(inp, ddof = ddof, axis = axis)
|
|
601
|
+
|
|
602
|
+
|
|
603
|
+
|
|
604
|
+
|