StackGP 0.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
StackGP/StackGP.py ADDED
@@ -0,0 +1,998 @@
1
+ #!/usr/bin/env python
2
+ # coding: utf-8
3
+
4
+ import random
5
+ import matplotlib.pyplot as plt
6
+ import numpy as np
7
+ import math
8
+ import copy
9
+ import sys
10
+ from scipy.stats import pearsonr # for computing correlation
11
+ from functools import reduce #for flattening lists
12
+ from operator import concat #for flattening lists
13
+ from scipy.stats import trim_mean # for ensemble evaluation
14
+ from scipy.stats import differential_entropy
15
+ import warnings
16
+ import time
17
+ import dill
18
+ import os
19
+ from sklearn.cluster import KMeans #for clustering in ensemble definition
20
+ from scipy.optimize import minimize #for uncertainty maximization
21
+ from sympy import symbols
22
+ warnings.filterwarnings('ignore', '.*invalid value.*' )
23
+ warnings.filterwarnings('ignore', '.*overflow.*' )
24
+ warnings.filterwarnings('ignore', '.*divide by.*' )
25
+ warnings.filterwarnings('ignore', '.*is constant.*' )
26
+ warnings.filterwarnings('ignore', '.*nearly constant.*' )
27
+ warnings.filterwarnings('ignore', '.*Polyfit may be.*' )
28
+ warnings.filterwarnings('ignore', '.*Number of.*')
29
+ def protectDiv(a,b):
30
+
31
+ if (type(b)==int or type(b)==float or type(b)==np.float64) and b==0:
32
+ return a/math.nan
33
+ if (type(b)==np.ndarray) and (0 in b):
34
+ return a/np.where(b==0,math.nan,b)
35
+ return a/b
36
+ def add(a,b):
37
+ return a+b
38
+ def sub(a,b):
39
+ return a-b
40
+ def mult(a,b):
41
+ return a*b
42
+ def exp(a):
43
+ return np.exp(a)
44
+ # def sine(a,b):
45
+ # return np.sin(a)
46
+ def power(a,b):
47
+ return a**b
48
+ def sqrt(a):
49
+ return np.sqrt(a)
50
+ def sqrd(a):
51
+ return a**2
52
+ def inv(a):
53
+ return np.array(a).astype(float)**(-1)
54
+ def sin(a):
55
+ return np.sin(a)
56
+ def cos(a):
57
+ return np.cos(a)
58
+ def tan(a):
59
+ return np.tan(a)
60
+ def arccos(a):
61
+ return np.arccos(a)
62
+ def arcsin(a):
63
+ return np.arcsin(a)
64
+ def arctan(a):
65
+ return np.arctan(a)
66
+ def tanh(a):
67
+ return np.tanh(a)
68
+ def log(a):
69
+ return np.log(a)
70
+
71
+ def defaultOps():
72
+ return [protectDiv,add,sub,mult,exp,sqrd,sqrt,inv,"pop","pop","pop","pop","pop","pop"]
73
+ def allOps():
74
+ return [protectDiv,add,sub,mult,exp,sqrd,sqrt,inv,cos,sin,tan,arccos,arcsin,arctan,tanh,log,"pop","pop","pop","pop","pop","pop","pop","pop","pop","pop"]
75
+ def randomInt(a=-3,b=3):
76
+ return random.randint(a,b)
77
+ def defaultConst():
78
+ return [np.pi, np.e, randomInt,ranReal ]
79
+ def ranReal(a=20,b=-10):
80
+ return random.random()*a-b
81
+
82
+
83
+ ############################
84
+ #Data Subsampling Methods
85
+ ############################
86
+ def randomSubsample(x,y):
87
+ n=max(int(np.ceil(len(y)**(3/5))),3)
88
+ idx=np.random.choice(range(x.shape[1]),n,replace=False)
89
+ return np.array([i[idx] for i in x]),y[idx]
90
+
91
+ def generationProportionalSample(x,y,generation=100,generations=100):
92
+ n=max(int(np.ceil(len(y)*(generation/generations)**(3/5))),3)
93
+ idx=np.random.choice(range(x.shape[1]),n,replace=False)
94
+ return np.array([i[idx] for i in x]),y[idx]
95
+
96
+ import inspect
97
+ def getArity(func): #Returns the arity of a function: used for model evaluations
98
+ if func=="pop":
99
+ return 1
100
+ return len(inspect.signature(func).parameters)
101
+
102
+ getArity.__doc__ = "getArity(func) takes a function and returns the function arity"
103
+ def modelArity(model): #Returns the total arity of a model
104
+ return 1+sum([getArity(i)-1 for i in model[0]])
105
+
106
+ modelArity.__doc__ = "modelArity(model) returns the total arity of a model"
107
+ def listArity(data): #Returns arity of evaluating a list of operators
108
+ if len(data)==0:
109
+ return 0
110
+ return 1+sum([getArity(i)-1 for i in data])
111
+ listArity.__doc__ = "listArity(list) returns the arity of evaluating a list of operators"
112
+ def buildEmptyModel(): # Generates an empty model
113
+ return [[],[],[]]
114
+ buildEmptyModel.__doc__ = "buildEmptyModel() takes no inputs and generates an empty GP model"
115
+ def variableSelect(num): #Function that creates a function to select a specific variable
116
+ return lambda variables: variables[num]
117
+ variableSelect.__doc__ = "variableSelect(n) is a function that creates a function to select the nth variable"
118
+ def modelToListForm(model):
119
+ model[0]=model[0].tolist()
120
+ def modelRestoreForm(model):
121
+ model[0]=np.array(model[0],dtype=object)
122
+
123
+ def generateRandomModel(variables,ops,const,maxLength): #Generates a random GP model
124
+ prog = buildEmptyModel() #Generate an empty model with correct structure
125
+ varChoices=[variableSelect(i) for i in range(variables)]+const #All variable and constants choices
126
+ prog[0]=np.array(np.random.choice(ops,random.randint(1,maxLength)),dtype=object) #Choose random operators
127
+ countVars=modelArity(prog) #Count how many variables/constants are needed
128
+ prog[1]=np.random.choice(varChoices,countVars) #Choose random variables/constants
129
+ prog[1]=[i() if (callable(i) and i.__name__!='<lambda>' )else i for i in prog[1]] #If function then evaluate
130
+ return prog
131
+ generateRandomModel.__doc__ = "generateRandomModel() takes as input the variables, operators, constants, and max program length and returns a random program"
132
+ def initializeGPModels(variables,ops=defaultOps(),const=defaultConst(),numberOfModels=100,maxLength=10): # generate random linear program
133
+ prog=[[],[],[]]
134
+ # prog stores [Operators, VarConst, QualityMetrics]
135
+
136
+ models=[generateRandomModel(variables,ops,const,maxLength) for i in range(numberOfModels)] #Generate models
137
+
138
+ return models
139
+ initializeGPModels.__doc__ = "initializeGPModels(countOfVariables, operators, constants, numberOfModels=100, maxLength=10) returns a set of randomly generated models"
140
+
141
+ def reverseList(data): #Returns a list reversed
142
+ return [i for i in reversed(data)]
143
+ reverseList.__doc__ = "reverseList(data) returns the data list reversed"
144
+ def varReplace(data,variables): #Replaces variable references with data during model evaluation
145
+ return [i(variables) if callable(i) else i for i in data]
146
+ varReplace.__doc__ = "varReplace(data,variables) replaces references to variables in data with actual values"
147
+ def inputLen(data): #Returns the number of data records in a data set
148
+ el1=data[0]
149
+ if type(el1)==list or type(el1)==np.ndarray:
150
+ return len(el1)
151
+ else:
152
+ return 1
153
+ inputLen.__doc__ = "inputLen(data) determines the number of data records in a data set"
154
+ def varCount(data): #Returns the number of variables in a data set
155
+ return len(data)
156
+ varCount.__doc__ = "varCount(data) determines the number of variables in a data set"
157
+ def evaluateGPModel(model,inputData): #Evaluates a model numerically
158
+ response=evModHelper(model[1],model[0],[],np.array(inputData).astype(float))[2][0]
159
+ if not type(response)==np.ndarray and inputLen(inputData)>1:
160
+ response=np.array([response for i in range(inputLen(inputData))])
161
+ return response
162
+ evaluateGPModel.__doc__ = "evaluateGPModel(model,data) numerically evaluates a model using the data stored in inputData"
163
+ def evModHelper(varStack,opStack,tempStack,data): #Recursive helper function for evaluateGPModel
164
+ stack1=varStack
165
+ stack2=opStack
166
+ stack3=tempStack
167
+
168
+ if len(stack2)==0:
169
+ return [stack3,stack2,stack1]
170
+ op=stack2[0]
171
+ stack2=stack2[1:]
172
+
173
+ if callable(op):
174
+
175
+ patt=getArity(op)
176
+ while patt>len(stack3):
177
+ stack3=[stack1[0]]+stack3
178
+ stack1=stack1[1:]
179
+ try:
180
+ temp=op(*varReplace(reverseList(stack3[:patt]),data))
181
+ except TypeError:
182
+ print("stack3: ", stack3, " patt: ", patt, " data: ", data)
183
+ temp=np.nan
184
+ except OverflowError:
185
+ temp=np.nan
186
+ stack3=stack3[patt:]
187
+ stack3=[temp]+stack3
188
+
189
+ else:
190
+ if len(stack1)>0:
191
+ stack3=varReplace([stack1[0]],data)+stack3
192
+ stack1=stack1[1:]
193
+ if len(stack2)>0:
194
+ stack1,stack2,stack3=evModHelper(stack1,stack2,stack3,data)
195
+
196
+ return [stack1,stack2,stack3]
197
+ evModHelper.__doc__ = "evModHelper(varStack,opStack,tempStack,data) is a helper function for evaluateGPModel"
198
+ def fitness(prog,data,response): # Fitness function using correlation
199
+ predicted=evaluateGPModel(prog,np.array(data))
200
+ if type(predicted)!=list and type(predicted)!=np.ndarray:
201
+ predicted=np.array([predicted for i in range(inputLen(data))])
202
+ try:
203
+ if np.isnan(predicted).any() or np.isinf(predicted).any():
204
+ return np.nan
205
+ except TypeError:
206
+ #print(predicted)
207
+ return np.nan
208
+ except OverflowError:
209
+ return np.nan
210
+ if (not all(np.isfinite(np.array(predicted,dtype=np.float32)))) or np.all(predicted==predicted[0]):
211
+ return np.nan
212
+ try:
213
+ fit=1-pearsonr(predicted,np.array(response))[0]**2 # 1-R^2
214
+ except ValueError:
215
+ return 1
216
+ if math.isnan(fit):
217
+ return 1 # If nan return 1 as fitness
218
+ return fit # Else return actual fitness 1-R^2
219
+ fitness.__doc__ = "fitness(program,data,response) returns the 1-R^2 value of a model"
220
+ def stackGPModelComplexity(model,*args):
221
+ return len(model[0])+len(model[1])-model[0].tolist().count("pop")
222
+ stackGPModelComplexity.__doc__ = "stackGPModelComplexity(model) returns the complexity of the model"
223
+ def setModelQuality(model,inputData,response,modelEvaluationMetrics=[fitness,stackGPModelComplexity]):
224
+ model[2]=[i(model,inputData,response) for i in modelEvaluationMetrics]
225
+
226
+ setModelQuality.__doc__ = "setModelQuality(model, inputdata, response, metrics=[r2,size]) is an inplace operator that sets a models quality"
227
+ def stackPass(model,pt):
228
+ i=0
229
+ t=0
230
+ p=0
231
+ s=model[0]
232
+ if i <pt:
233
+ t+=1
234
+ while i<pt:
235
+ if s[i]=="pop":
236
+ t+=1
237
+ p+=1
238
+ else:
239
+ p+=max(0,getArity(s[i])-t)
240
+ t=max(1,t-getArity(s[i])+1)
241
+ i+=1
242
+ stack1=model[1][p:]
243
+ stack2=reverseList(model[1][:p])[:t+1]
244
+ return [stack1,stack2]
245
+ def stackGrab(stack1, stack2, num):
246
+ tStack1=copy.deepcopy(stack1)
247
+ tStack2=copy.deepcopy(stack2)
248
+ newStack=[]
249
+ if len(stack2)<num:
250
+ newStack=stack2+stack1[:(num-len(stack2))]
251
+ tStack1=tStack1[num-len(tStack2):]
252
+ tStack2=[]
253
+ else:
254
+ newStack=stack2[:num]
255
+ tStack2=tStack2[num:]
256
+ return [newStack,tStack1,tStack2]
257
+ def fragmentVariables(model,pts):
258
+ stack1,stack2=stackPass(model,pts[0])
259
+ opStack=model[0]
260
+ newStack=[]
261
+ i=pts[0]
262
+ while i<=pts[1]:
263
+ if opStack[i]=="pop" and len(stack1)>0:
264
+ stack2=[stack1[0]]+stack2
265
+ stack1=stack1[1:]
266
+ else:
267
+ if len(newStack)==0 and pts[0]==0:
268
+ tStack,stack1,stack2=stackGrab(stack1,stack2,getArity(opStack[i]))
269
+ else:
270
+ tStack,stack1,stack2=stackGrab(stack1,stack2,getArity(opStack[i])-1)
271
+ newStack=newStack+tStack
272
+ i+=1
273
+ return newStack
274
+
275
+ def recombination2pt(model1,model2): #2 point recombination
276
+ pts1=np.sort(random.sample(range(0,len(model1[0])+1),2))
277
+ pts2=np.sort(random.sample(range(0,len(model2[0])+1),2))
278
+ #pts1=[4,5]
279
+ #pts2=[2,4]
280
+ #pts1=[0,3]
281
+ #pts2=[1,3]
282
+ #print(pts1,pts2)
283
+ child1=buildEmptyModel()
284
+ child2=buildEmptyModel()
285
+
286
+ parent1=copy.deepcopy(model1)
287
+ parent2=copy.deepcopy(model2)
288
+ parent1[0]=np.array(parent1[0],dtype=object).tolist()
289
+ parent2[0]=np.array(parent2[0],dtype=object).tolist()
290
+
291
+ child1[0]=np.array(parent1[0][0:pts1[0]]+parent2[0][pts2[0]:pts2[1]]+parent1[0][pts1[1]:],dtype=object)
292
+ child2[0]=np.array(parent2[0][0:pts2[0]]+parent1[0][pts1[0]:pts1[1]]+parent2[0][pts2[1]:],dtype=object)
293
+
294
+ varPts1=[listArity(parent1[0][:(pts1[0])])+0,listArity(parent2[0][:(pts2[0])])+0,listArity(parent2[0][pts2[0]:pts2[1]]),listArity(parent1[0][pts1[0]:pts1[1]])]
295
+ if pts1[0]==0:
296
+ varPts1[0]+=1
297
+ if pts2[0]==0:
298
+ varPts1[1]+=1
299
+ child1[1]=parent1[1][:varPts1[0]]+parent2[1][varPts1[1]:(varPts1[1]+varPts1[2]-1)]+parent1[1][(varPts1[0]+varPts1[3]-1):]
300
+
301
+ varPts2=[listArity(parent2[0][:(pts2[0])])+0,listArity(parent1[0][:(pts1[0])])+0,listArity(parent1[0][pts1[0]:pts1[1]]),listArity(parent2[0][pts2[0]:pts2[1]])]
302
+ if pts1[0]==0:
303
+ varPts2[1]+=1
304
+ if pts2[0]==0:
305
+ varPts2[0]+=1
306
+ child2[1]=parent2[1][:varPts2[0]]+parent1[1][varPts2[1]:(varPts2[1]+varPts2[2]-1)]+parent2[1][(varPts2[0]+varPts2[3]-1):]
307
+ #print(varPts1,varPts2)
308
+
309
+ return [child1,child2]
310
+ recombination2pt.__doc__ = "recombination2pt(model1,model2) does 2 point crossover and returns two children models"
311
+
312
+ def get_numeric_indices(l): #Returns indices of list that are numeric
313
+ return [i for i in range(len(l)) if type(l[i]) in [int,float]]
314
+
315
+
316
+ def mutate(model,variables,ops=defaultOps(),const=defaultConst(),maxLength=10):
317
+ newModel=copy.deepcopy(model)
318
+ newModel[0]=np.array(newModel[0],dtype=object).tolist()
319
+ mutationType=random.randint(0,7)
320
+ varChoices=[variableSelect(i) for i in range(variables)]+const
321
+ opChoice=0
322
+ varChoice=0
323
+
324
+ tmp=0
325
+
326
+ if mutationType==0: #single operator mutation
327
+ opChoice=random.randint(0,len(newModel[0])-1)
328
+ if len(newModel[0])>0:
329
+ newModel[0][opChoice]=np.random.choice([i for i in ops] )
330
+
331
+ elif mutationType==1: #single variable mutation
332
+ varChoice=np.random.choice(varChoices)
333
+ if callable(varChoice) and varChoice.__name__!='<lambda>':
334
+ varChoice=varChoice()
335
+ newModel[1][random.randint(0,len(newModel[1])-1)]=varChoice
336
+
337
+ elif mutationType==2: #insertion mutation to top of stack
338
+ opChoice=np.random.choice(ops)
339
+ newModel[0]=[opChoice]+newModel[0]
340
+ while modelArity(newModel)>len(newModel[1]):
341
+ varChoice=np.random.choice(varChoices)
342
+ if callable(varChoice) and varChoice.__name__!='<lambda>':
343
+ varChoice=varChoice()
344
+ newModel[1]=[varChoice]+newModel[1]
345
+
346
+ elif mutationType==3: #deletion mutation from top of stack
347
+ if len(newModel[0])>1:
348
+ opChoice=random.randint(1,len(newModel[0])-1)
349
+ newModel[0]=newModel[0][-opChoice:]
350
+ newModel[1]=newModel[1][-listArity(newModel[0]):]
351
+
352
+ elif mutationType==4: #insertion mutation to bottom of stack
353
+ opChoice=np.random.choice([i for i in ops])
354
+ newModel[0].append(opChoice)
355
+
356
+ elif mutationType==5: #mutation via crossover with random model
357
+ newModel=recombination2pt(newModel,generateRandomModel(variables,ops,const,maxLength))[0]
358
+
359
+ elif mutationType==6: #single operator insertion mutation
360
+ singleOps=[op for op in ops if getArity(op)==1 and op!='pop']
361
+ singleOps.append('pop')
362
+ pos=random.randint(0,len(newModel[0])-1)
363
+ newModel[0].insert(pos,np.random.choice(singleOps))
364
+
365
+ elif mutationType==7: #nudge numeric constant
366
+ pos=get_numeric_indices(newModel[1])
367
+ if(len(pos)>0): #If there are numeric constants
368
+ pos=random.choice(pos)
369
+ newModel[1][pos]=newModel[1][pos]+np.random.normal(-1,1)
370
+
371
+ if modelArity(newModel)<len(newModel[1]):
372
+ newModel[1]=newModel[1][:modelArity(newModel)]
373
+ elif modelArity(newModel)>len(newModel[1]):
374
+ newModel[1]=newModel[1]+[np.random.choice(varChoices) for i in range(modelArity(newModel)-len(newModel[1]))]
375
+ newModel[1]=[varChoice() if callable(varChoice) and varChoice.__name__!='<lambda>' else varChoice for varChoice in newModel[1]]
376
+ newModel[0]=np.array(newModel[0],dtype=object)
377
+ return newModel
378
+
379
+ mutate.__doc__ = "mutate(model,variableCount,ops,constants,maxLength) mutates a model"
380
+ def paretoFront(fitValues): #Returns Boolean list of Pareto front elements
381
+ onFront = np.ones(fitValues.shape[0], dtype = bool)
382
+ for i, j in enumerate(fitValues):
383
+ if onFront[i]:
384
+ onFront[onFront] = np.any(fitValues[onFront]<j, axis=1)
385
+ onFront[i] = True
386
+ return onFront
387
+ def paretoTournament(pop): # selects the Pareto front of a model set
388
+ fitnessValues=np.array([mod[2] for mod in pop])
389
+ return (np.array(pop,dtype=object)[paretoFront(fitnessValues)]).tolist()
390
+ def tournamentModelSelection(models, popSize=100,tourneySize=5):
391
+ selectedModels=[]
392
+ selectionSize=popSize
393
+ while len(selectedModels)<popSize:
394
+ tournament=random.sample(models,tourneySize)
395
+ winners=paretoTournament(tournament)
396
+ selectedModels=selectedModels+winners
397
+
398
+ return selectedModels
399
+ paretoTournament.__doc__ = "paretoTournament(models, inputData, responseData) returns the Pareto front of a model set"
400
+ def modelSameQ(model1,model2): #Checks if two models are the same
401
+ return len(model1[0])==len(model2[0]) and len(model1[1]) == len(model2[1]) and all(model1[0]==model2[0]) and model1[1]==model2[1]
402
+ modelSameQ.__doc__ = "modelSameQ(model1,model2) checks if model1 and model2 are the same and returns True if so, else False"
403
+ def deleteDuplicateModels(models): #Removes any models that are the same, does not consider simplified form
404
+ uniqueMods = [models[0]]
405
+
406
+ for mod in models:
407
+ test=False
408
+ for checkMod in uniqueMods:
409
+ if modelSameQ(mod,checkMod):
410
+ test=True
411
+ if not test:
412
+ uniqueMods.append(mod)
413
+
414
+ return uniqueMods
415
+ deleteDuplicateModels.__doc__ = "deleteDuplicateModels(models) deletes models that have the same form without simplifying"
416
+
417
+ def deleteDuplicateModelsPhenotype(models): #Removes any models that are the same regarding phenotype, does not consider simplified form
418
+ uniqueMods = [printGPModel(models[0])]
419
+ remainingMods=[printGPModel(mod) for mod in models[1:]]
420
+ uniquePos = [0]
421
+ currPos=1
422
+ for mod in remainingMods:
423
+ test=False
424
+ for checkMod in uniqueMods:
425
+ if mod==checkMod:
426
+ test=True
427
+ if not test:
428
+ uniqueMods.append(mod)
429
+ uniquePos.append(currPos)
430
+ currPos+=1
431
+
432
+ return [models[i] for i in uniquePos]
433
+
434
+ def removeIndeterminateModels(models): #Removes models from the population that evaluate to nonreal values
435
+ return [i for i in models if (not any(np.isnan(i[2]))) and all(np.isfinite(np.isnan(i[2])))]
436
+ removeIndeterminateModels.__doc__ = "removeIndeterminateModels(models) removes models that have a fitness that results from inf or nan values"
437
+ def sortModels(models):
438
+ return sorted(models, key=lambda m:m[2])
439
+ sortModels.__doc__ = "sortModels(models) sorts a model population by the models' accuracies"
440
+ def selectModels(models, selectionSize=0.5):
441
+ tMods=copy.deepcopy(models)
442
+ [modelToListForm(mod) for mod in tMods]
443
+ paretoModels=[]
444
+ if selectionSize<=1:
445
+ selection=selectionSize*len(models)
446
+ else:
447
+ selection=selectionSize
448
+
449
+ while len(paretoModels)<selection:
450
+ front=paretoTournament(tMods)
451
+ paretoModels=paretoModels+front
452
+ for i in front:
453
+ tMods.remove(i)
454
+ [modelRestoreForm(mod) for mod in paretoModels]
455
+ return paretoModels
456
+ selectModels.__doc__ = "selectModels(models, selectionSize=0.5) iteratively selects the Pareto front of a model population until n or n*popSize models are selected"
457
+ def stackVarUsage(opStack): #Counts how many variables are used by the operator stack
458
+ pos=getArity(opStack[0])
459
+ for j in range(1,len(opStack)):
460
+ pos+=getArity(opStack[j])-1
461
+ if opStack[j]=='pop':
462
+ pos+=1
463
+ return pos
464
+ stackVarUsage.__doc__ = "stackVarUsage(opStack) is a helper function that determines how many variables/constants are needed by the operator stack"
465
+ def trimModel(mod): #Removes extra pop operators that do nothing
466
+ model=copy.deepcopy(mod)
467
+ i=0
468
+ varStack=len(mod[1])
469
+ tempStack=0
470
+ varStack-=getArity(model[0][i])
471
+ tempStack+=1
472
+ i+=1
473
+ while varStack>0:
474
+ if model[0][i]=='pop':
475
+ varStack-=1
476
+ tempStack+=1
477
+ else:
478
+
479
+ take=getArity(model[0][i])-tempStack
480
+ if take>0:
481
+ varStack-=take
482
+ tempStack=1
483
+ else:
484
+ tempStack-=getArity(model[0][i])-1
485
+ i+=1
486
+ model[0]=np.array(model[0][:i].tolist()+[j for j in model[0][i:] if not j=='pop'],dtype=object)
487
+ return model
488
+ trimModel.__doc__ = "trimModel(model) trims extra pop operators off the operator stack so that further modifications such as a model alignment aren't altered by those pop operators"
489
+ def alignGPModel(model, data, response): #Aligns a model
490
+ prediction=evaluateGPModel(model,data)
491
+ if (not all(np.isfinite(np.array(prediction)))) or np.all(prediction==prediction[0]):
492
+ return model
493
+ if np.isnan(np.array(prediction)).any() or np.isnan(np.array(response)).any() or not np.isfinite(np.array(prediction,dtype=np.float32)).all():
494
+ return model
495
+ try:
496
+ align=np.round(np.polyfit(prediction,response,1,rcond=1e-16),decimals=14)
497
+ except np.linalg.LinAlgError:
498
+ #print("Alignment failed for: ", model, " with prediction: ", prediction, "and reference data: ", response)
499
+ return model
500
+ newModel=trimModel(model)
501
+ newModel[0]=np.array(newModel[0].tolist()+[mult,add],dtype=object)
502
+ newModel[1]=newModel[1]+align.tolist()
503
+ setModelQuality(newModel,data,response)
504
+ return newModel
505
+ alignGPModel.__doc__ = "alignGPModel(model, input, response) aligns a model such that response-a*f(x)+b are minimized over a and b"
506
+ def evolve(inputData, responseData, generations=100, ops=defaultOps(), const=defaultConst(), variableNames=[], mutationRate=79, crossoverRate=11, spawnRate=10, extinction=False,extinctionRate=10,elitismRate=50,popSize=300,maxComplexity=100,align=True,initialPop=[],timeLimit=300,capTime=False,tourneySize=5,tracking=False,modelEvaluationMetrics=[fitness,stackGPModelComplexity],dataSubsample=False,samplingMethod=randomSubsample):
507
+
508
+ fullInput,fullResponse=copy.deepcopy(inputData),copy.deepcopy(responseData)
509
+ inData=copy.deepcopy(fullInput)
510
+ resData=copy.deepcopy(fullResponse)
511
+ variableCount=varCount(inData)
512
+ models=initializeGPModels(variableCount,ops,const,popSize)
513
+ models=models+initialPop
514
+ startTime=time.perf_counter()
515
+ bestFits=[]
516
+ for i in range(generations):
517
+ if capTime and time.perf_counter()-startTime>timeLimit:
518
+ break
519
+ if dataSubsample:
520
+ inData,resData=samplingMethod(fullInput,fullResponse)
521
+ for mods in models:
522
+ setModelQuality(mods,inData,resData,modelEvaluationMetrics=modelEvaluationMetrics)
523
+ models=removeIndeterminateModels(models)
524
+ if tracking:
525
+ bestFits.append(min([mods[2][0] for mods in paretoTournament(models)]))
526
+
527
+ #paretoModels=paretoTournament(models)
528
+ paretoModels=selectModels(models,elitismRate/100*popSize if elitismRate/100*popSize<len(models) else len(models))
529
+ if extinction and i%extinctionRate:
530
+ models=initializeGPModels(variableCount,ops,const,popSize)
531
+ for mods in models:
532
+ setModelQuality(mods,inData,resData,modelEvaluationMetrics=modelEvaluationMetrics)
533
+
534
+ models=tournamentModelSelection(models,popSize,tourneySize)
535
+
536
+ crossoverPairs=random.sample(models,round(crossoverRate/100*popSize))
537
+ toMutate=random.sample(models,round(mutationRate/100*popSize))
538
+
539
+ childModels=paretoModels
540
+
541
+ for j in range(round(len(crossoverPairs)/2)-1):
542
+ childModels=childModels+recombination2pt(crossoverPairs[j],crossoverPairs[j+round(len(crossoverPairs)/2)])
543
+
544
+ for j in toMutate:
545
+ childModels=childModels+[mutate(j,variableCount,ops,const)]
546
+
547
+ childModels=childModels+initializeGPModels(variableCount,ops,const,round(spawnRate/100*popSize))
548
+
549
+ childModels=deleteDuplicateModels(childModels)
550
+ childModels=[model for model in childModels if stackGPModelComplexity(model)<maxComplexity]
551
+
552
+ #for mods in childModels:
553
+ # setModelQuality(mods,inData,resData,modelEvaluationMetrics=modelEvaluationMetrics)
554
+ #childModels=removeIndeterminateModels(childModels)
555
+
556
+ if len(childModels)<popSize:
557
+ childModels=childModels+initializeGPModels(variableCount,ops,const,popSize-len(childModels))
558
+
559
+ models=copy.deepcopy(childModels)
560
+
561
+
562
+ for mods in models:
563
+ setModelQuality(mods,fullInput,fullResponse,modelEvaluationMetrics=modelEvaluationMetrics)
564
+ models=[trimModel(mod) for mod in models]
565
+ models=deleteDuplicateModels(models)
566
+ models=removeIndeterminateModels(models)
567
+ models=sortModels(models)
568
+ if align:
569
+ models=[alignGPModel(mods,fullInput,fullResponse) for mods in models]
570
+
571
+ if tracking:
572
+ bestFits.append(min([mods[2][0] for mods in paretoTournament(models)]))
573
+ plt.figure()
574
+ plt.plot(bestFits)
575
+ plt.title("Fitness over Time")
576
+ plt.xlabel("Generations")
577
+ plt.ylabel("Fitness")
578
+ plt.show()
579
+
580
+ return models
581
+
582
+
583
+ def replaceFunc(stack,f1,f2):
584
+ return [i if i!=f1 else f2 for i in stack]
585
+ def printGPModel(mod,inputData=symbols(["x"+str(i) for i in range(100)])): #Evaluates a model algebraically
586
+ def inv1(a):
587
+ return a**(-1)
588
+ from sympy import tan as tan1, exp as exp1, sqrt as sqrt1, sin as sin1, cos as cos1, acos, asin, atan, tanh as tanh1, log as log1
589
+ def sqrt2(a):
590
+ return sqrt1(a)
591
+ def log2(a):
592
+ return log1(a)
593
+ model = copy.deepcopy(mod)
594
+ model[0] = replaceFunc(model[0],exp,exp1)
595
+ model[0] = replaceFunc(model[0],tan,tan1)
596
+ model[0] = replaceFunc(model[0],sqrt,sqrt2)
597
+ model[0] = replaceFunc(model[0],inv,inv1)
598
+ model[0] = replaceFunc(model[0],sin,sin1)
599
+ model[0] = replaceFunc(model[0],cos,cos1)
600
+ model[0] = replaceFunc(model[0],arccos,acos)
601
+ model[0] = replaceFunc(model[0],arcsin,asin)
602
+ model[0] = replaceFunc(model[0],arctan,atan)
603
+ model[0] = replaceFunc(model[0],tanh,tanh1)
604
+ model[0] = replaceFunc(model[0],log,log2)
605
+ response=evModHelper(model[1],model[0],[],np.array(inputData))[2][0]
606
+ return response
607
+
608
+ def ensembleSelect(models, inputData, responseData, numberOfClusters=10): #Generates a model ensemble using input data partitions
609
+ data=np.transpose(inputData)
610
+ if len(data)<numberOfClusters:
611
+ numberOfClusters=len(data)
612
+ clusters=KMeans(n_clusters=numberOfClusters).fit_predict(data)
613
+ if numberOfClusters>len(set(clusters)):
614
+ numberOfClusters=len(set(clusters))
615
+ clusters=KMeans(n_clusters=numberOfClusters).fit_predict(data)
616
+ dataParts=[]
617
+ partsResponse=[]
618
+ for i in range(numberOfClusters):
619
+ dataParts.append([])
620
+ partsResponse.append([])
621
+
622
+ for i in range(len(clusters)):
623
+ dataParts[clusters[i]].append(data[i])
624
+ partsResponse[clusters[i]].append(responseData[i])
625
+
626
+ modelResiduals=[]
627
+
628
+ for i in range(len(models)):
629
+ modelResiduals.append([])
630
+ for i in range(len(models)):
631
+ for j in range(numberOfClusters):
632
+ modelResiduals[i].append(fitness(models[i],np.transpose(dataParts[j]),partsResponse[j]))
633
+
634
+ best=[]
635
+ for i in range(numberOfClusters):
636
+ ordering=np.argsort(modelResiduals[i])
637
+ j=0
638
+ while ordering[j] in best:
639
+ j+=1
640
+ best.append(ordering[j])
641
+ ensemble=[models[best[i]] for i in range(numberOfClusters)]
642
+
643
+ return ensemble
644
+ def uncertainty(data,trim=0.3):
645
+ wl=None
646
+ if len(data)<=4:
647
+ wl=1
648
+ h=differential_entropy(data,window_length=wl)
649
+ if np.isfinite(h):
650
+ return h
651
+ else:
652
+ return 0
653
+
654
+ def evaluateModelEnsemble(ensemble, inputData):
655
+ responses=[evaluateGPModel(mod, inputData) for mod in ensemble]
656
+ if type(responses[0])==np.ndarray:
657
+ responses=np.transpose(responses)
658
+ uncertainties=[uncertainty(res,0) for res in responses]
659
+ else:
660
+
661
+ uncertainties=[uncertainty(responses,0)]
662
+
663
+ return uncertainties
664
+ def relativeEnsembleUncertainty(ensemble,inputData):
665
+ output=evaluateModelEnsemble(ensemble,inputData)
666
+ return np.array(output)
667
+
668
+ def createUncertaintyFunc(ensemble):
669
+ return lambda x: -relativeEnsembleUncertainty(ensemble,x)
670
+
671
+ def maximizeUncertainty(ensemble,varCount,bounds=[]): #Used to select a new point of maximum uncertainty
672
+ func=createUncertaintyFunc(ensemble)
673
+ x0=[np.mean(bounds[i]) for i in range(varCount)]
674
+ if bounds==[]:
675
+ pt=minimize(func,x0).x
676
+ else:
677
+ pt=minimize(func,x0,bounds=bounds).x
678
+ return pt
679
+ def extendData(data,newPoint):
680
+ return np.concatenate((data.T,np.array([newPoint]))).T
681
+
682
+ def activeLearningCheckpoint(eqNum,version,i,inputData,response,testInput,testResponse,errors,models,minerr):
683
+ path=os.path.join(str(eqNum),str(version))
684
+ file=open(path,"wb+")
685
+ dill.dump([i,inputData,response,testInput,testResponse,errors,models,minerr],file)
686
+ file.close()
687
+ def activeLearningCheckpointLoad(eqNum,version,i,inputData,response,testInput,testResponse,errors,models,minerr):
688
+ path=os.path.join(str(eqNum),str(version))
689
+ try:
690
+ with open(path,'rb') as f:
691
+ i,inputData,response,testInput,testResponse,errors,models,minerr=dill.load(f)
692
+ except FileNotFoundError:
693
+ return i,inputData,response,testInput,testResponse,errors,models,minerr
694
+ return i,inputData,response,testInput,testResponse,errors,models,minerr
695
+ def subSampleSpace(space):
696
+ newSpace=copy.deepcopy(space)
697
+ newSpace=list(newSpace)
698
+ for i in range(len(newSpace)):
699
+ pts=sorted([np.random.uniform(newSpace[i][0],newSpace[i][1]),np.random.uniform(newSpace[i][0],newSpace[i][1])])
700
+ newSpace[i]=tuple(pts)
701
+ return tuple(newSpace)
702
+
703
+ def activeLearning(func, dims, ranges,rangesP,eqNum=1,version=1,iterations=100): #func should be a lamda function of form lambda data: f(data[0],data[1],...)
704
+ try:
705
+ with open(os.path.join(str(eqNum),str(version))+".txt",'rb') as f:
706
+ return -1
707
+ except FileNotFoundError:
708
+ pass
709
+ inputData=[]
710
+ testInput=[]
711
+ found=False
712
+ for i in range(dims):
713
+ inputData.append(np.random.uniform(ranges[i][0],ranges[i][1],3))
714
+ testInput.append(np.random.uniform(ranges[i][0],ranges[i][1],200))
715
+ inputData=np.array(inputData)
716
+ testInput=np.array(testInput)
717
+ response=func(inputData)
718
+ testResponse=func(testInput)
719
+ errors=[]
720
+ models=[]
721
+ minerr=1
722
+ for i in range(iterations):
723
+ print("input: ",inputData)
724
+ print("\n response: ",response)
725
+ i,inputData,response,testInput,testResponse,errors,models,minerr=activeLearningCheckpointLoad(eqNum,version,i,inputData,response,testInput,testResponse,errors,models,minerr)
726
+ if i>iterations-1:
727
+ break
728
+ i+=1
729
+ models1=evolve(inputData,response,initialPop=models,generations=1000,tracking=False,popSize=300,ops=allOps(),timeLimit=120,capTime=True,align=False,elitismRate=10)
730
+ models2=evolve(inputData,response,initialPop=models,generations=1000,tracking=False,popSize=300,ops=allOps(),timeLimit=120,capTime=True,align=False,elitismRate=10)
731
+ models3=evolve(inputData,response,initialPop=models,generations=1000,tracking=False,popSize=300,ops=allOps(),timeLimit=120,capTime=True,align=False,elitismRate=10)
732
+ models4=evolve(inputData,response,initialPop=models,generations=1000,tracking=False,popSize=300,ops=allOps(),timeLimit=120,capTime=True,align=False,elitismRate=10)
733
+ models=models1+models2+models3+models4
734
+ models=selectModels(models,20)
735
+ alignedModels=[alignGPModel(mods,inputData,response) for mods in models]
736
+ ensemble=ensembleSelect(alignedModels,inputData,response)
737
+ out=maximizeUncertainty(ensemble,dims,rangesP)
738
+ while out in inputData.T:
739
+ out=maximizeUncertainty(ensemble,dims,subSampleSpace(rangesP))
740
+ inputData=extendData(inputData,out)
741
+ response=func(inputData)
742
+ fitList=np.array([fitness(mod,testInput,testResponse) for mod in alignedModels])
743
+ errors.append(min(fitList[np.logical_not(np.isnan(fitList))]))
744
+ minerr=errors[-1]
745
+ if minerr<1e-14:
746
+ #print("Points needed in round", j,": ",3+i, " Time needed: ", time.perf_counter()-roundTime)
747
+ if not os.path.exists(str(eqNum)):
748
+ os.makedirs(str(eqNum))
749
+ path=os.path.join(str(eqNum),str(version))
750
+ file=open(path,"wb+")
751
+ dill.dump([i,inputData,response,testInput,testResponse,errors,models,minerr],file)
752
+ file.close()
753
+ file=open(path+'.txt','w+')
754
+ file.write(str(i+3)+'\n')
755
+ file.write(str(errors))
756
+ file.close()
757
+ return 3+i
758
+ found=True
759
+ ptsNeeded.append(3+i)
760
+ break
761
+ activeLearningCheckpoint(eqNum,version,i,inputData,response,testInput,testResponse,errors,models,minerr)
762
+ if found==False:
763
+ #print("Points needed in round",j,": NA (model not found)")
764
+ path=os.path.join(str(eqNum),str(version))
765
+ file=open(path,"wb")
766
+ dill.dump([-1,inputData,response,testInput,testResponse,errors,models,minerr],file)
767
+ file.close()
768
+ file=open(path+'.txt',"w+")
769
+ file.write(str(i+3)+"\n")
770
+ file.write(str(errors))
771
+ file.close()
772
+ return -1
773
+
774
+ def plotModels(models):
775
+ tMods=copy.deepcopy(models)
776
+ [modelToListForm(mod) for mod in tMods]
777
+ paretoModels=paretoTournament(tMods)
778
+ for i in paretoModels:
779
+ tMods.remove(i)
780
+ [modelRestoreForm(mod) for mod in paretoModels]
781
+ [modelRestoreForm(mod) for mod in tMods]
782
+
783
+ pAccuracies=[mod[2][0] for mod in paretoModels]
784
+ pComplexities=[mod[2][1] for mod in paretoModels]
785
+
786
+ accuracies=[mod[2][0] for mod in tMods]+pAccuracies
787
+ complexities=[mod[2][1] for mod in tMods]+pComplexities
788
+ colors=['blue' for i in range(len(tMods))]+['red' for i in range(len(pAccuracies))]
789
+
790
+ fig,ax = plt.subplots()
791
+
792
+ sc=plt.scatter(complexities,accuracies,color=colors)
793
+ plt.xlabel("Complexity")
794
+ plt.ylabel("1-R**2")
795
+ names=[str(printGPModel(mod)) for mod in tMods]+[str(printGPModel(mod)) for mod in paretoModels]
796
+
797
+ label = ax.annotate("", xy=(0,0), xytext=(np.min(complexities),np.mean([np.max(accuracies),np.min(accuracies)])),
798
+ bbox=dict(boxstyle="round", fc="w"),
799
+ arrowprops=dict(arrowstyle="->"))
800
+ label.set_visible(False)
801
+
802
+ def update_labels(ind):
803
+
804
+ pos = sc.get_offsets()[ind["ind"][0]]
805
+ label.xy = pos
806
+ text = "{}".format(" ".join([names[n] for n in [ind["ind"][0]]]))
807
+ label.set_text(text)
808
+ label.get_bbox_patch().set_facecolor('grey')
809
+ label.get_bbox_patch().set_alpha(0.9)
810
+
811
+
812
+ def hover(event):
813
+ vis = label.get_visible()
814
+ if event.inaxes == ax:
815
+ cont, ind = sc.contains(event)
816
+ if cont:
817
+ update_labels(ind)
818
+ label.set_visible(True)
819
+ fig.canvas.draw_idle()
820
+ else:
821
+ if vis:
822
+ label.set_visible(False)
823
+ fig.canvas.draw_idle()
824
+
825
+ fig.canvas.mpl_connect("motion_notify_event", hover)
826
+
827
+ plt.show()
828
+
829
+ def plotModelResponseComparison(model,inputData,response,sort=False):
830
+ plt.scatter(range(len(response)),response,label="True Response")
831
+ plt.scatter(range(len(response)),evaluateGPModel(model,inputData),label="Model Prediction")
832
+ plt.legend()
833
+ plt.xlabel("Data Index")
834
+ plt.ylabel("Response Value")
835
+ plt.show()
836
+ def plotPredictionResponseCorrelation(model,inputData,response):
837
+ plt.scatter(response,evaluateGPModel(model,inputData),label="Model")
838
+ plt.plot(response,response,label="Perfect Correlation",color='green')
839
+ plt.xlabel("True Response")
840
+ plt.ylabel("Predicted Response")
841
+ plt.legend()
842
+ plt.show()
843
+ #Plot model complexity distribution
844
+ def plotModelComplexityDistribution(models):
845
+ tMods=copy.deepcopy(models)
846
+ [modelToListForm(mod) for mod in tMods]
847
+ paretoModels=paretoTournament(tMods)
848
+ for i in paretoModels:
849
+ tMods.remove(i)
850
+ [modelRestoreForm(mod) for mod in paretoModels]
851
+ [modelRestoreForm(mod) for mod in tMods]
852
+ pComplexities=[mod[2][1] for mod in paretoModels]
853
+ tComplexities=[mod[2][1] for mod in tMods]
854
+ plt.hist(tComplexities,label="Non-Pareto Models")
855
+ plt.hist(pComplexities,label="Pareto Models")
856
+ plt.xlabel("Model Complexity")
857
+ plt.ylabel("Frequency")
858
+ plt.legend()
859
+ plt.show()
860
+ #Plot model accuracy distribution
861
+ def plotModelAccuracyDistribution(models):
862
+ tMods=copy.deepcopy(models)
863
+ [modelToListForm(mod) for mod in tMods]
864
+ paretoModels=paretoTournament(tMods)
865
+ for i in paretoModels:
866
+ tMods.remove(i)
867
+ [modelRestoreForm(mod) for mod in paretoModels]
868
+ [modelRestoreForm(mod) for mod in tMods]
869
+ pAccuracies=[mod[2][0] for mod in paretoModels]
870
+ tAccuracies=[mod[2][0] for mod in tMods]
871
+ plt.hist(tAccuracies,label="Non-Pareto Models")
872
+ plt.hist(pAccuracies,label="Pareto Models")
873
+ plt.xlabel("Model Accuracy")
874
+ plt.ylabel("Frequency")
875
+ plt.legend()
876
+ plt.show()
877
+ #Plot model residuals relative to response
878
+ def plotModelResiduals(model,input,response):
879
+ plt.scatter(response,evaluateGPModel(model,input)-response)
880
+ plt.xlabel("Response")
881
+ plt.ylabel("Residual")
882
+ plt.show()
883
+ #Plot model residual distribution
884
+ def plotModelResidualDistribution(model,input,response):
885
+ plt.hist(evaluateGPModel(model,input)-response)
886
+ plt.xlabel("Residual")
887
+ plt.ylabel("Frequency")
888
+ plt.show()
889
+ #Plot the presence of variables in a model population
890
+ def plotVariablePresence(models,variables=["x"+str(i) for i in range(100)],sort=False):
891
+ vars=[varReplace(model[1],variables) for model in models]
892
+ #Remove all numeric entries in vars
893
+ vars=[[i for i in var if type(i)!=int and type(i)!=float] for var in vars]
894
+ #Merge into one list
895
+ vars=[j for i in vars for j in i]
896
+ #Count frequency of each variable in vars
897
+ varFreqs=[vars.count(i) for i in variables]
898
+ #Keep only variables that appear at least once
899
+ variablesUsed=[variables[i] for i in range(len(varFreqs)) if varFreqs[i]>0]
900
+ varFreqs=[varFreqs[i] for i in range(len(varFreqs)) if varFreqs[i]>0]
901
+ if sort:
902
+ order=np.argsort(varFreqs)[::-1]
903
+ variablesUsed=[variablesUsed[i] for i in order]
904
+ varFreqs=[varFreqs[i] for i in order]
905
+ #Plot variable frequency
906
+ plt.bar(variablesUsed,varFreqs)
907
+ plt.xlabel("Variable")
908
+ plt.ylabel("Frequency")
909
+ plt.show()
910
+ def replaceOpsWithStrings(opStack):
911
+ model = copy.deepcopy(opStack)
912
+ model = replaceFunc(model,exp,str("exp"))
913
+ model = replaceFunc(model,tan,str("tan"))
914
+ model = replaceFunc(model,sqrt,str("sqrt"))
915
+ model = replaceFunc(model,inv,str("1/#"))
916
+ model = replaceFunc(model,sin,str("sin"))
917
+ model = replaceFunc(model,cos,str("cos"))
918
+ model = replaceFunc(model,arccos,str("acos"))
919
+ model = replaceFunc(model,arcsin,str("asin"))
920
+ model = replaceFunc(model,arctan,str("atan"))
921
+ model = replaceFunc(model,tanh,str("tanh"))
922
+ model = replaceFunc(model,log,str("log"))
923
+ model = replaceFunc(model,add,"+")
924
+ model = replaceFunc(model,mult,"*")
925
+ model = replaceFunc(model,sub,"-")
926
+ model = replaceFunc(model,protectDiv,"/")
927
+ model = replaceFunc(model,sqrd,"^2")
928
+ return model
929
+ #Plot the presence of operators in a model population
930
+ def plotOperatorPresence(models,sort=False,excludePop=True):
931
+ ops=[replaceOpsWithStrings(model[0]) for model in models]
932
+ #Merge into one list
933
+ ops=[j for i in ops for j in i]
934
+ #Remove duplicates in ops
935
+ uniqueOps=list(set(ops))
936
+ if excludePop:
937
+ #Remove pop operator
938
+ uniqueOps.remove('pop')
939
+ #Count frequency of each operator in ops
940
+ opFreqs=[ops.count(i) for i in uniqueOps]
941
+ #Keep only operators that appear at least once
942
+ opsUsed=[str(uniqueOps[i]) for i in range(len(opFreqs)) if opFreqs[i]>0]
943
+ opFreqs=[opFreqs[i] for i in range(len(opFreqs)) if opFreqs[i]>0]
944
+ if sort:
945
+ order=np.argsort(opFreqs)[::-1]
946
+ opsUsed=[opsUsed[i] for i in order]
947
+ opFreqs=[opFreqs[i] for i in order]
948
+ #Plot operator frequency
949
+ plt.bar(opsUsed,opFreqs)
950
+ #Rotate x axis labels
951
+ plt.xticks(rotation=0)
952
+ plt.xlabel("Operator")
953
+ plt.ylabel("Frequency")
954
+ plt.show()
955
+
956
+ ############################
957
+ #Sharpness Computations
958
+ ############################
959
+
960
+ def sharpnessConstants(model,inputData,responseData,numPerturbations=10,percentPerturbation=0.2):
961
+
962
+ fits=[]
963
+
964
+ #For each model parameter, if numeric, randomly perturb by x% and see how much the model changes
965
+ for i in range(numPerturbations):
966
+ tempModel=copy.deepcopy(model)
967
+ newParameters=[param if callable(param) else param*(1+percentPerturbation*(np.random.uniform()-0.5)) for param in model[1]]
968
+ tempModel[1]=newParameters
969
+ fits.append(fitness(tempModel,inputData,responseData))
970
+ return np.std(fits)
971
+
972
+ def sharpnessData(model,inputData,responseData,numPerturbations=10,percentPerturbation=0.2,preserveSign=False):
973
+
974
+ fits=[]
975
+
976
+ #For each vector, randomly perturb by x% of the standard deviation and see how much the model fitness changes
977
+ for i in range(numPerturbations):
978
+ tempData=copy.deepcopy(inputData)
979
+ tempData=np.array([(vec+percentPerturbation*np.std(vec)*(np.random.uniform(size=len(vec))-0.5)) for vec in tempData])
980
+ if preserveSign:
981
+ signs=[np.unique(var) for var in np.sign(inputData)]
982
+ tempData=[signs[i]*abs(tempData[i]) if len(signs[i])==1 else tempData[i] for i in range(len(signs))]
983
+ fits.append(fitness(model,tempData,responseData))
984
+ return np.std(fits)
985
+
986
+ def totalSharpness(model,inputData,responseData,numPerturbations=10,percentPerturbation=0.2,preserveSign=False):
987
+
988
+ return sharpnessConstants(model,inputData,responseData,numPerturbations=numPerturbations,percentPerturbation=percentPerturbation)+sharpnessData(model,inputData,responseData,numPerturbations=numPerturbations,percentPerturbation=percentPerturbation,preserveSign=preserveSign)
989
+
990
+ ############################
991
+ #Multiple Independent Searches
992
+ ############################
993
+ def runEpochs(x,y,epochs=5,**kwargs):
994
+ models=[]
995
+ for i in range(epochs):
996
+ models+=evolve(x,y,**kwargs)
997
+
998
+ return sortModels(models)
StackGP/__init__.py ADDED
File without changes
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2023 Nathan Haut
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,24 @@
1
+ Metadata-Version: 2.1
2
+ Name: StackGP
3
+ Version: 0.0.1
4
+ Summary: A stack-based genetic programming system
5
+ Author-email: Nathan Haut <hautnath@msu.edu>
6
+ Project-URL: Homepage, https://github.com/hoolagans/StackGP
7
+ Project-URL: Issues, https://github.com/hoolagans/StackGP/issues
8
+ Classifier: Programming Language :: Python :: 3
9
+ Classifier: License :: OSI Approved :: MIT License
10
+ Classifier: Operating System :: OS Independent
11
+ Requires-Python: >=3.8
12
+ Description-Content-Type: text/markdown
13
+ License-File: LICENSE
14
+
15
+ # StackGP
16
+ A stack-based genetic programming system in Python
17
+
18
+
19
+
20
+ ## Publications using StackGP:
21
+
22
+ 1. Active Learning Improves Performance on Symbolic Regression Tasks in StackGP https://dl.acm.org/doi/10.1145/3520304.3528941
23
+ 2. Correlation Versus RMSE Loss Functions in Symbolic Regression Tasks https://link.springer.com/chapter/10.1007/978-981-19-8460-0_2
24
+ 3. Active Learning Informs Symbolic Regression Model Development in Genetic Programming https://doi.org/10.1145/3583133.3590577
@@ -0,0 +1,7 @@
1
+ StackGP/StackGP.py,sha256=8ZfawyMagpl-SpG8Ko4AY9MjJx5OuxSqnaJuCUYONG0,42145
2
+ StackGP/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
3
+ StackGP-0.0.1.dist-info/LICENSE,sha256=oP5zZM8kaqnHjiDmyzI6FBUMaE3U6Ay2EBlQW8P5AQE,1068
4
+ StackGP-0.0.1.dist-info/METADATA,sha256=3S6Fz7zBR_Pz3T6Hnyq9Ttu-PknuwdFr6NJ-pqLuC7E,991
5
+ StackGP-0.0.1.dist-info/WHEEL,sha256=-oYQCr74JF3a37z2nRlQays_SX2MqOANoqVjBBAP2yE,91
6
+ StackGP-0.0.1.dist-info/top_level.txt,sha256=ZMnq1T0y1D49ZgYmvTDZDUx7lp_AInDPCgDJwODya-4,8
7
+ StackGP-0.0.1.dist-info/RECORD,,
@@ -0,0 +1,5 @@
1
+ Wheel-Version: 1.0
2
+ Generator: setuptools (71.0.3)
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
5
+
@@ -0,0 +1 @@
1
+ StackGP