poseigen-compass 0.0.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,7 @@
1
+ Copyright 2025 Husam Abdulnabi
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the “Software”), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
4
+
5
+ The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
6
+
7
+ THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -0,0 +1,21 @@
1
+ Metadata-Version: 2.3
2
+ Name: poseigen_compass
3
+ Version: 0.0.2
4
+ Summary: The basics for Poseigen software
5
+ License: MIT
6
+ Author: Husam Abdulnabi
7
+ Author-email: husam.abdulnabi@gmail.com
8
+ Maintainer: Husam Abdulnabi
9
+ Maintainer-email: husam.abdulnabi@gmail.com
10
+ Requires-Python: >=3.12
11
+ Classifier: Development Status :: 3 - Alpha
12
+ Classifier: Intended Audience :: Developers
13
+ Classifier: Topic :: Software Development
14
+ Classifier: Programming Language :: Python :: 3
15
+ Requires-Dist: numpy
16
+ Requires-Dist: pandas
17
+ Requires-Dist: poseigen_seaside
18
+ Project-URL: Homepage, https://github.com/husam94/poseigen_compass
19
+ Description-Content-Type: text/markdown
20
+
21
+ Will do
@@ -0,0 +1 @@
1
+ Will do
@@ -0,0 +1,58 @@
1
+
2
+
3
+ [build-system]
4
+
5
+ requires = ["setuptools",
6
+ "numpy",
7
+ "pandas",
8
+ "poseigen_seaside"
9
+
10
+ ]
11
+
12
+ [project]
13
+
14
+ name = "poseigen_compass"
15
+
16
+ version = "0.0.2"
17
+
18
+ requires-python = ">=3.12"
19
+
20
+ description = "The basics for Poseigen software"
21
+
22
+ readme = "README.md"
23
+
24
+ license = "MIT"
25
+
26
+ license-files = ["LICEN[CS]E.*"]
27
+
28
+ keywords = []
29
+
30
+ authors = [{ name = "Husam Abdulnabi", email = "husam.abdulnabi@gmail.com" }]
31
+
32
+ maintainers = [
33
+ { name = "Husam Abdulnabi", email = "husam.abdulnabi@gmail.com" },
34
+ ]
35
+
36
+ classifiers = [
37
+ # How mature is this project? Common values are
38
+ # 3 - Alpha
39
+ # 4 - Beta
40
+ # 5 - Production/Stable
41
+ "Development Status :: 3 - Alpha",
42
+
43
+ # Indicate who your project is intended for
44
+ "Intended Audience :: Developers",
45
+ "Topic :: Software Development",
46
+
47
+ "Programming Language :: Python :: 3",
48
+
49
+ ]
50
+
51
+ dependencies = [
52
+ "numpy",
53
+ "pandas",
54
+ "poseigen_seaside"
55
+ ]
56
+
57
+ [project.urls]
58
+ "Homepage" = "https://github.com/husam94/poseigen_compass"
@@ -0,0 +1,604 @@
1
+ import numpy as np
2
+ import pandas as pd
3
+ import os
4
+ import copy
5
+ import glob, shutil
6
+ #-----------------------------
7
+ import poseigen_seaside.basics as se
8
+ import poseigen_seaside.metrics as mex
9
+ #-----------------------------
10
+
11
+
12
+ def VarTra_exp(inp, inverse = False):
13
+ # TRICK IS YOU DO REVNUMBERS AFTER IN THE CALL
14
+ return np.log(inp) if inverse == False else np.exp(inp)
15
+
16
+ def RevNumbers(inp, min = 0, max = 10):
17
+ return max - inp + min
18
+
19
+ def RandomCanGen(VarDict, num_can, num_gen = 10, configspace = False):
20
+
21
+ # NO LHC HERE.
22
+
23
+ # adding num_gen which multiplies num_can so that we get UNIQUE ones.
24
+
25
+ num_cangen = num_can * num_gen
26
+
27
+ NewCanDict = {}
28
+
29
+ if configspace is False:
30
+
31
+ GenCanDict = {}
32
+ for i in range(num_cangen): GenCanDict[i] = {}
33
+
34
+ sample = np.random.uniform(size = (num_cangen, len(VarDict))) #creates random numbers between 0 and 1
35
+
36
+ for ik, key in enumerate(VarDict):
37
+ if VarDict[key][1] == 'cat':
38
+ lvk = len(VarDict[key][0])
39
+ c = np.random.choice(range(lvk), size = num_cangen)
40
+ r = [VarDict[key][0][u] for u in c]
41
+
42
+ else:
43
+ n,m = VarDict[key][0][0], VarDict[key][0][1]
44
+
45
+ if len(VarDict[key]) > 2:
46
+ n,m = (VarDict[key][2](q) for q in (n,m))
47
+
48
+ g = sample[:, ik] * (m - n) + n
49
+
50
+ if len(VarDict[key]) > 2:
51
+ g = [VarDict[key][2](jj, inverse = True) for jj in g]
52
+
53
+ if VarDict[key][2] == VarTra_exp:
54
+ g = RevNumbers(g, VarDict[key][0][0], VarDict[key][0][1])
55
+
56
+ r = np.round(g).astype(int) if VarDict[key][1] == 'int' else g
57
+ r = r.tolist()
58
+
59
+ for i in range(num_cangen): GenCanDict[i][key] = r[i]
60
+
61
+ # NOW TO GET UNIQUE:
62
+
63
+ NewCanDict = se.UniqueNestedDict(GenCanDict, keepkey = False)
64
+ NewCanDict = {k:v for k,v in NewCanDict.items() if k < num_can}
65
+
66
+
67
+ else:
68
+ configs = VarDict.sample_configuration(int(num_can))
69
+ for i in range(num_can): NewCanDict[i] = dict(configs[i])
70
+
71
+ return NewCanDict
72
+
73
+
74
+
75
+ ################################################################################
76
+
77
+ def StandardCanScorer(algo, algo_args, data, Splits = None, metrics_mode= [mex.AError, {}], add_metrics_modes = None,
78
+ pathname = None, returnmodel = False):
79
+
80
+ #add_metrics_mode is optional but if specified is a list of functions and a list of their respective arguments.
81
+
82
+ if Splits is not None: dataz = [[d[Splits[s][t]] for d in data] for t in [0,1]]
83
+ elif isinstance(data, dict): dataz = data[data.keys[s]]
84
+ else: dataz = data
85
+
86
+ m = algo(**algo_args)
87
+ m.fit(*dataz[0])
88
+ y_hat = m.predict(dataz[1][0])
89
+ score = metrics_mode[0](y_hat, *dataz[1][1:], **metrics_mode[1])
90
+
91
+ if pathname is not None: se.PickleDump(m, pathname)
92
+
93
+ add_scores = []
94
+ if add_metrics_modes is not None:
95
+ lamm = len(add_metrics_modes) // 2
96
+ for amm in range(lamm):
97
+ add_scores.append(add_metrics_modes[amm](y_hat, *dataz[1][1:], **metrics_mode[amm+lamm]))
98
+
99
+ score = [score, add_scores]
100
+
101
+ return score if returnmodel is False else (score, m)
102
+
103
+
104
+
105
+
106
+ ################################################################################
107
+
108
+
109
+ def ModelEvalHelper(c,
110
+ algo, CanDict, data, Splits = None, repeats = 1,
111
+ CS_mode = [StandardCanScorer, {'metrics_mode': [mex.AError, {'expo': 2}]}], CSDict = {},
112
+ lmd = 1, lsp = 1,
113
+ statusprints = True, pathname = None, savemodels = False, pn_Can = None):
114
+
115
+ #Sept 25 modification: Data splitting is handled by the scorer.
116
+
117
+ i, s, r = c
118
+
119
+ Split = Splits[s] if Splits is not None else None
120
+
121
+ if statusprints:
122
+ if r == 0 and s == 0:
123
+ itos = CanDict[i].items() if statusprints == True else [(k, CanDict[i][k]) for k in statusprints]
124
+ print(f' Model {i+1} of {lmd}: {itos}')
125
+ print(f' cross val {s+1} of {lsp}, Repeat {r+1} of {repeats}')
126
+
127
+ savepath = None
128
+ if pathname is not None and savemodels == True:
129
+ savepath = pathname + str(i) + '_' + str(s) + '_' + str(r)
130
+
131
+ meh_args = {}
132
+ if data is not None: meh_args['data'] = data
133
+ if Split is not None: meh_args['Split'] = Split
134
+ score = CS_mode[0](algo, CanDict[i], **meh_args,
135
+ pathname = savepath, **CSDict[i])
136
+
137
+ if statusprints: print(f" Model {i+1}, cross val {s+1}, Repeat {r+1}: {score}")
138
+
139
+ return score
140
+
141
+ def CanEvaluator(algo, CanDict, data, Splits = None, repeats = 1, parallel = False,
142
+ CS_mode = [StandardCanScorer, {'metrics_mode': [mex.AError, {'expo': 2}]}], CS_vars = None,
143
+ pickup = False, statusprints = True, pathname = None, savemodels = False,
144
+ ext = None):
145
+
146
+ #June 29, ADDED PARALLELIZATION!
147
+
148
+ #For YAHPO gym, adding a data is None option. If Data is None, it is assumed to be a surrogate operation.
149
+
150
+
151
+ if ext is True: ext = 'CanEval'
152
+ newpathname = se.NewFolder(pathname, ext = ext)
153
+
154
+ if pathname is None: pickup = False
155
+ else: pn_met = newpathname + 'Mets' + '.p'
156
+
157
+ CDK = CanDict.keys()
158
+ lmd = len(CDK)
159
+
160
+ CSDict = {}
161
+ for i in CDK:
162
+ CSDict[i] = CS_mode[1]
163
+ if CS_vars is not None:
164
+ x = {v: CanDict[i][v] for v in CS_vars}
165
+ CSDict[i].update(x)
166
+ for v in CS_vars:
167
+ del CanDict[i][v]
168
+
169
+ #Each split has to be in the format: [array, array]
170
+ lsp = 1
171
+ if Splits is not None:
172
+ if isinstance(Splits[0], list) is False: Splits = [Splits]
173
+ lsp = len(Splits)
174
+ elif isinstance(data, dict): lsp = len(data)
175
+
176
+ modelcombos = []
177
+ newmetrics = {}
178
+ for i in CDK:
179
+ newmetrics[i] = {}
180
+ for s in range(lsp):
181
+ newmetrics[i][s] = {}
182
+ for r in range(repeats):
183
+ newmetrics[i][s][r] = None
184
+ modelcombos.append([i, s, r])
185
+
186
+ if pickup and os.path.isfile(pn_met):
187
+ oldmetrics = se.PickleLoad(pn_met)
188
+ for i in oldmetrics.keys():
189
+ if i < lmd:
190
+ for s in oldmetrics[i].keys():
191
+ for r in oldmetrics[i][s].keys():
192
+ newmetrics[i][s][r] = oldmetrics[i][s][r]
193
+
194
+ print(newmetrics)
195
+
196
+ MEH_args = {'algo': algo, 'CanDict': CanDict, 'data': data, 'Splits': Splits, 'repeats': repeats,
197
+ 'CS_mode': CS_mode, 'CSDict': CSDict, 'lmd': lmd, 'lsp': lsp,
198
+ 'statusprints': statusprints, 'pathname': newpathname, 'savemodels': savemodels}
199
+
200
+ #BELOW NEEDS WORK SINCE CHANGING METRICS TO A DICTIONARY
201
+ #if parallel > 1: #Needs to be an int > 1
202
+ #pool = multiprocessing.Pool(parallel)
203
+ #metrics = pool.map(*pack_function_for_map(ModelEvalHelper, modelcombos[len(metrics):], **MEH_args))
204
+ #if pathname is not None: pickle.dump(metrics, open(pathname + pn_CEMe + '.p', 'wb'))
205
+ #else #benefit of non-parallel is the pickup. Can we do pickup with
206
+
207
+ for c in modelcombos:
208
+ i, s, r = c
209
+ if newmetrics[i][s][r] is None:
210
+ newmetrics[i][s][r] = ModelEvalHelper(c, **MEH_args)
211
+ if pathname is not None: se.PickleDump(newmetrics, pn_met)
212
+
213
+ return newmetrics
214
+
215
+
216
+
217
+
218
+
219
+ def RandomOpt(algo, VarDict, data, Splits = None,
220
+ budget = 20, repeats = 1,
221
+ CS_mode = [StandardCanScorer, {'metrics_mode': [mex.AError, {'expo': 2}]}], CS_vars = None,
222
+ RMG_args = {}, configspace = False,
223
+
224
+ smallest = None, #PLACEHOLDER, DOESNT DO SHIT
225
+
226
+ pickup = False, statusprints = True, pathname = None, savemodels = False, ext = None):
227
+
228
+ ######################################################
229
+
230
+ if ext is True: ext = 'RandomOpt'
231
+ newpathname = se.NewFolder(pathname, ext = ext)
232
+
233
+ #--------------------------
234
+ if pathname is not None:
235
+
236
+ CS_args = copy.deepcopy(CS_mode[1])
237
+ if 'trainer_args' in CS_args.keys():
238
+ for xo in ['inps', 'out', 'out_bind']:
239
+ CS_args['trainer_args'][xo] = None
240
+
241
+ RO_args = {'VarDict': VarDict, 'CS_mode': [CS_mode[0], CS_args]}
242
+
243
+ se.PickleDump(RO_args, newpathname + 'RO_args')
244
+ #--------------------------
245
+
246
+ if pathname is None: pickup = False
247
+ else: pn_OO = newpathname + 'Out' + '.p'
248
+
249
+ if pickup and os.path.isfile(pn_OO):
250
+
251
+ CanDict, metrics = se.PickleLoad(pn_OO)
252
+
253
+ # the following is an option to add more candidates in first round
254
+ lcdk = len(CanDict[0].keys())
255
+ difo = budget - lcdk
256
+
257
+ if difo < 0:
258
+ for i in list(CanDict[0].keys()):
259
+ if i >= budget: del CanDict[0][i]
260
+
261
+ if difo > 0: # DOES NOT GUARANTEE UNIQUE ONES!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
262
+
263
+ print('adding more')
264
+
265
+ moreCanDict = RandomCanGen(VarDict, difo * 20, **RMG_args) #WE TRY DIFO * 20 FOR MROE CHANCE OF UNIQUE.
266
+
267
+ cdo = copy.deepcopy(CanDict)
268
+ cdo[0].update({k+lcdk: v for k,v in moreCanDict.items()})
269
+ CanDict = se.UniqueNestedDict(cdo, keepkey = False) #THEN RESETS NUMBER
270
+ CanDict = {0: {k:v for k,v in CanDict[0].items() if k < budget}} #BOOM.
271
+
272
+ for i in list(CanDict.keys()):
273
+ if i > 0: del CanDict[i]
274
+
275
+ se.PickleDump([CanDict, metrics], pn_OO)
276
+
277
+ else:
278
+ CanDict, metrics = {}, {}
279
+ RMG_args.update({'configspace': configspace})
280
+ CanDict[0] = RandomCanGen(VarDict, budget, **RMG_args)
281
+ if newpathname is not None: se.PickleDump([CanDict, metrics], pn_OO)
282
+
283
+ ######################################################
284
+
285
+ #to filter out keys that are only 1 value in statusprints
286
+
287
+ sps = statusprints
288
+ if statusprints:
289
+ sps = []
290
+ for key, val in VarDict.items():
291
+ if val[1] == 'cat':
292
+ if len(val[0]) > 1: sps.append(key)
293
+ else: sps.append(key)
294
+
295
+ ######################################################
296
+
297
+ ro = 0
298
+
299
+ pn_can = newpathname + str(ro) if pathname is not None else None
300
+
301
+ metrics[ro] = CanEvaluator(algo, CanDict[ro], data,
302
+ Splits = Splits, repeats = repeats,
303
+ CS_vars = CS_vars, CS_mode = CS_mode,
304
+ pickup = pickup, statusprints = sps,
305
+ pathname = pn_can, savemodels = savemodels, ext = ext)
306
+
307
+ OptOut = [CanDict, metrics]
308
+ if newpathname is not None: se.PickleDump(OptOut,pn_OO)
309
+
310
+
311
+ return OptOut
312
+
313
+
314
+
315
+ def Metrics2Flat(metrics):
316
+ #metrics is now a dictionary where it is Model, Split, Repeat
317
+ flatmetrics = []
318
+ for i in metrics.keys():
319
+ for s in metrics[i].keys():
320
+ for r in metrics[i][s].keys():
321
+ flatmetrics.append(metrics[i][s][r])
322
+ return np.array(flatmetrics)
323
+
324
+
325
+
326
+ #############################################################################################
327
+
328
+ def TopCan(OptOut, num_top = 10,
329
+ reduce_func = None, smallest = True,
330
+ perround = False, ext = None):
331
+
332
+ # fromidxs = None,
333
+ # savemodels = False, ext = None):
334
+
335
+ #reduce_func summarizes the scores.
336
+ #perround returns the best cans per round. useful for optotp testing.
337
+
338
+ #This function just returns the best candidates(s) per round or general.
339
+ # If its per round, it returns a list of lists. If its not, it returns an array where each row has the [round, cand]
340
+
341
+ # Also, currently applies the reduce func over all splits and repeats.
342
+
343
+ if isinstance(OptOut, str):
344
+
345
+ if ext is True: ext = 'SurvOpt'
346
+ newpathname = se.NewFolder(OptOut, ext = ext)
347
+ CanDict, metrics = se.PickleLoad(newpathname + 'Out')
348
+
349
+ else: CanDict, metrics = OptOut
350
+
351
+ if reduce_func is None: reduce_func = np.nanmin if smallest else np.nanmax
352
+ k = 1 if smallest else -1
353
+
354
+
355
+ met_reduced = [np.array([reduce_func([metrics[k1][k2][k3][k4]
356
+ for k3 in metrics[k1][k2].keys()
357
+ for k4 in metrics[k1][k2][k3].keys()])
358
+ for k2 in metrics[k1].keys()]) for k1 in metrics.keys()]
359
+
360
+ if perround is False:
361
+
362
+ met_reduced_comb = np.concatenate(met_reduced)
363
+ round_idxs = np.concatenate([np.repeat(im, len(met)) for im, met in enumerate(met_reduced)])
364
+ can_idxs = np.concatenate([np.arange(len(met)) for met in met_reduced])
365
+ idxs_to_use = np.argsort(met_reduced_comb)[::k][:num_top]
366
+ top_idxs = np.array([x[idxs_to_use] for x in [round_idxs, can_idxs]]).T
367
+
368
+ else:
369
+
370
+ top_idxs = [np.array([np.repeat(imet, num_top), np.argsort(met)[::k][:num_top]]).T
371
+ for imet, met in enumerate(met_reduced)]
372
+
373
+ return top_idxs
374
+
375
+
376
+ def TopCanDict(OptOut, topcan, comb_rounds = True, ext = None):
377
+
378
+ if isinstance(OptOut, str):
379
+
380
+ if ext is True: ext = 'SurvOpt'
381
+ newpathname = se.NewFolder(OptOut, ext = ext)
382
+ CanDict, metrics = se.PickleLoad(newpathname + 'Out')
383
+
384
+ else: CanDict, metrics = OptOut
385
+
386
+ if isinstance(topcan, list) is False: topcan = [topcan]
387
+
388
+ if comb_rounds: NewCanDict = {str(tc[0]) + '_' + str(tc[1]): CanDict[tc[0]][tc[1]]
389
+ for tx in topcan for tc in tx}
390
+
391
+ else: NewCanDict = {tx[0, 0]: {tc[1]: CanDict[tc[0]][tc[1]] for tc in tx}
392
+ for tx in topcan}
393
+
394
+ return NewCanDict
395
+
396
+
397
+ def TopCanGetData(pathname, newpathname, topcan,
398
+ indiv_folders = True, #Gathers into individual folders
399
+ noget = None):
400
+ # automatically combines into new folder and gives you topcandict.
401
+
402
+ if isinstance(topcan, list) is False: topcan = [topcan]
403
+
404
+ newpn = se.NewFolder(newpathname)
405
+
406
+ for tx in topcan:
407
+
408
+ pn_tx = se.NewFolder(pathname + str(tx[0][0]))
409
+
410
+ for tc in tx:
411
+
412
+ tc_prefix = str(tc[1]) + '_'
413
+ targnames = glob.glob(pn_tx + tc_prefix + '*')
414
+
415
+ if noget is not None:
416
+ if isinstance(noget, list) is False: noget = [noget]
417
+ targnames = [tn for tn in targnames if not any(tg in tn for tg in noget)]
418
+
419
+ new_tc_prefix = str(tc[0]) + '_' + str(tc[1])
420
+
421
+ new_tc_prefix = se.NewFolder(newpn + new_tc_prefix) if indiv_folders else newpn + new_tc_prefix + '_'
422
+
423
+ newnames = [new_tc_prefix + t[len(pn_tx) + len(tc_prefix):] for t in targnames]
424
+
425
+ for t,n in zip(targnames, newnames): shutil.copyfile(t, n)
426
+
427
+ return
428
+
429
+
430
+
431
+
432
+
433
+ #################################################
434
+
435
+ # ENSEMBLE STUFF #
436
+
437
+ def PredScorer(pred,
438
+ out, out_std = None, out_weights = None, out_bind = None,
439
+ split = None, metrics_mode = None,
440
+ pred_modif_mode = None):
441
+
442
+ if pred_modif_mode is not None: pred = pred_modif_mode[0](pred, **pred_modif_mode[1])
443
+
444
+ pred_sp, out_sp = [[xo[s] for s in split] for xo in [pred, out]]
445
+
446
+ ozx = []
447
+ for oz in [out_std, out_weights, out_bind]:
448
+ if oz is not None:
449
+ ozx.append([oz[s] for s in split])
450
+ else: ozx.append([None] * len(split))
451
+
452
+ out_std_sp, out_weights_sp, out_bind_sp = ozx
453
+
454
+ mets = []
455
+ for ps, os, ss, ws, bs in zip(pred_sp, out_sp, out_std_sp, out_weights_sp, out_bind_sp):
456
+
457
+ mm_args = copy.deepcopy(metrics_mode[1])
458
+
459
+ if ss is not None: mm_args['std'] = ss
460
+ if ws is not None: mm_args['weights'] = ws
461
+ if bs is not None: mm_args['bind'] = bs
462
+
463
+ me = metrics_mode[0](ps, os, **mm_args)
464
+ mets.append(me)
465
+
466
+ return np.array(mets) #Should be a 1 dim vector
467
+
468
+
469
+
470
+
471
+ def remove_outliers(inp, std_cutoff = 2, ddof = 0, return_idx = True):
472
+ #inp is a 1 dim list of vals
473
+ inp = np.array(inp) # just in case
474
+ ret = np.where(abs(inp - np.mean(inp)) < std_cutoff * np.std(inp, ddof = ddof))[0]
475
+ if return_idx is False:
476
+ ret = inp[ret]
477
+ return ret if return_idx else inp[ret]
478
+
479
+ def EnsembleIdxs(pn, std_cutoff = 2, ddof = 1):
480
+ #pn is that path to the folder
481
+ mets = se.PickleLoad(pn + 'Mets')[0]
482
+ mets2 = [y for x,y in mets.items()]
483
+ return remove_outliers(mets2, std_cutoff=std_cutoff, ddof = ddof, return_idx = True)
484
+
485
+ def EnsembleScorer(preds,
486
+ out, out_std = None, out_weights = None, out_bind = None,
487
+
488
+ split = None, metrics_mode = None,
489
+ score_on = 1,
490
+ std_cutoff = None, ddof = 1,
491
+ top = None, smallest = True,
492
+ pred_modif_mode = None,
493
+ pathname = None, return_extra = False):
494
+ # split is [train, stoppage, xx, xx]
495
+ # The std_cutoff is based on the performance on the stoppage set.
496
+ # preds is a list of preds where the length is each observation
497
+
498
+ #load_idx_keep is a pathname to go and get the idx_keep from.
499
+
500
+ if isinstance(preds[0], str): preds = [se.PickleLoad(p) for p in preds]
501
+ preds = np.array(preds)
502
+
503
+
504
+
505
+ ##################################
506
+
507
+ ps_args = {'out': out, 'out_std': out_std,
508
+ 'out_weights': out_weights, 'out_bind': out_bind,
509
+ 'split': split, 'metrics_mode': metrics_mode,
510
+ 'pred_modif_mode': pred_modif_mode}
511
+
512
+ mets_all = np.array([PredScorer(pred, **ps_args) for pred in preds])
513
+
514
+ stop_mets = mets_all[:, score_on] if len(mets_all.shape) > 1 else mets_all
515
+
516
+ if std_cutoff is not None:
517
+ idx_keep = remove_outliers(stop_mets, std_cutoff=std_cutoff,
518
+ ddof = ddof, return_idx = True)
519
+ elif top is not None:
520
+ k = 1 if smallest else -1
521
+ idx_keep = np.argsort(stop_mets)[::k][:top]
522
+
523
+ else: idx_keep = np.arange(len(stop_mets))
524
+
525
+ preds_keep = preds[idx_keep]
526
+
527
+ ##################################
528
+
529
+ preds_ensemb = np.mean(preds_keep, axis = 0)
530
+ preds_ensemb_scores = PredScorer(preds_ensemb, **ps_args)
531
+
532
+ if pathname is not None:
533
+ for g, gn in zip([idx_keep, preds_keep, preds_ensemb, preds_ensemb_scores],
534
+ ['idx', 'preds_keep', 'preds', 'scores']):
535
+ se.PickleDump(g, pathname + 'ensemble_' + gn)
536
+
537
+ return (preds_ensemb_scores, preds_ensemb, idx_keep) if return_extra else preds_ensemb_scores
538
+
539
+
540
+ def Bootstrapper(inp, mode = [], iters = 100,
541
+ boots_idx = None,
542
+ pathname = None,
543
+
544
+ updates = 10, return_idx = False):
545
+
546
+ inp = np.array(inp)
547
+ li = len(inp)
548
+ rango = np.arange(li)
549
+
550
+ bs_idx = np.array([np.random.choice(rango, size = li, replace = True)
551
+ for _ in np.arange(iters)]) if boots_idx is None else boots_idx
552
+
553
+ prods = []
554
+ for nidx, idx in enumerate(bs_idx):
555
+ prods.append(mode[0](inp[idx], **mode[1]))
556
+ if updates is not None:
557
+ if nidx % updates == 0: print(f'finished {nidx}')
558
+
559
+ if pathname is None:
560
+ prods = np.array([mode[0](inp[idx], **mode[1]) for idx in bs_idx])
561
+ else:
562
+ prods = np.array([mode[0](inp[idx], **mode[1],
563
+ pathname = se.NewFolder(pathname + str(im)))
564
+ for im, idx in enumerate(bs_idx)])
565
+
566
+ return (prods, bs_idx) if return_idx else prods
567
+
568
+ def PairwiseBootstrapper(inp1, inp2,
569
+ mode1 = [], mode2 = [],
570
+ iters = 10, updates = None):
571
+
572
+ #performs boostrapping on two inputs using mode1 and then does mode2 to compare the bootstrap inps.
573
+
574
+ prods1 = Bootstrapper(inp1, mode = mode1, iters = iters, updates = updates)
575
+ prods2 = Bootstrapper(inp2, mode = mode1, iters = iters, updates = updates)
576
+
577
+ return mode2[0](prods1, prods2, **mode2[1])
578
+
579
+ def BootstrapConfidenceInterval(inp, alpha = 0.95,
580
+ onesided = None,
581
+ axis = None):
582
+
583
+ #can be onsided 'greater' or 'lesser'.
584
+
585
+ if onesided is None:
586
+ alx = (1-alpha) / 2
587
+ p_lower = alx * 100
588
+ p_higher = (1-alx) * 100
589
+ elif onesided == 'lesser':
590
+ p_lower = 0
591
+ p_higher = alpha * 100
592
+ elif onesided == 'greater':
593
+ p_lower = (1 - alpha) * 100
594
+ p_higher = 100
595
+
596
+ return np.array([np.percentile(np.sort(inp), p, axis = axis) for p in [p_lower, p_higher]])
597
+
598
+ def BootstrapStandardError(inp, ddof = 0, axis = None):
599
+
600
+ return np.std(inp, ddof = ddof, axis = axis)
601
+
602
+
603
+
604
+