StackGP 0.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- StackGP/StackGP.py +998 -0
- StackGP/__init__.py +0 -0
- StackGP-0.0.1.dist-info/LICENSE +21 -0
- StackGP-0.0.1.dist-info/METADATA +24 -0
- StackGP-0.0.1.dist-info/RECORD +7 -0
- StackGP-0.0.1.dist-info/WHEEL +5 -0
- StackGP-0.0.1.dist-info/top_level.txt +1 -0
StackGP/StackGP.py
ADDED
|
@@ -0,0 +1,998 @@
|
|
|
1
|
+
#!/usr/bin/env python
|
|
2
|
+
# coding: utf-8
|
|
3
|
+
|
|
4
|
+
import random
|
|
5
|
+
import matplotlib.pyplot as plt
|
|
6
|
+
import numpy as np
|
|
7
|
+
import math
|
|
8
|
+
import copy
|
|
9
|
+
import sys
|
|
10
|
+
from scipy.stats import pearsonr # for computing correlation
|
|
11
|
+
from functools import reduce #for flattening lists
|
|
12
|
+
from operator import concat #for flattening lists
|
|
13
|
+
from scipy.stats import trim_mean # for ensemble evaluation
|
|
14
|
+
from scipy.stats import differential_entropy
|
|
15
|
+
import warnings
|
|
16
|
+
import time
|
|
17
|
+
import dill
|
|
18
|
+
import os
|
|
19
|
+
from sklearn.cluster import KMeans #for clustering in ensemble definition
|
|
20
|
+
from scipy.optimize import minimize #for uncertainty maximization
|
|
21
|
+
from sympy import symbols
|
|
22
|
+
warnings.filterwarnings('ignore', '.*invalid value.*' )
|
|
23
|
+
warnings.filterwarnings('ignore', '.*overflow.*' )
|
|
24
|
+
warnings.filterwarnings('ignore', '.*divide by.*' )
|
|
25
|
+
warnings.filterwarnings('ignore', '.*is constant.*' )
|
|
26
|
+
warnings.filterwarnings('ignore', '.*nearly constant.*' )
|
|
27
|
+
warnings.filterwarnings('ignore', '.*Polyfit may be.*' )
|
|
28
|
+
warnings.filterwarnings('ignore', '.*Number of.*')
|
|
29
|
+
def protectDiv(a,b):
|
|
30
|
+
|
|
31
|
+
if (type(b)==int or type(b)==float or type(b)==np.float64) and b==0:
|
|
32
|
+
return a/math.nan
|
|
33
|
+
if (type(b)==np.ndarray) and (0 in b):
|
|
34
|
+
return a/np.where(b==0,math.nan,b)
|
|
35
|
+
return a/b
|
|
36
|
+
def add(a,b):
|
|
37
|
+
return a+b
|
|
38
|
+
def sub(a,b):
|
|
39
|
+
return a-b
|
|
40
|
+
def mult(a,b):
|
|
41
|
+
return a*b
|
|
42
|
+
def exp(a):
|
|
43
|
+
return np.exp(a)
|
|
44
|
+
# def sine(a,b):
|
|
45
|
+
# return np.sin(a)
|
|
46
|
+
def power(a,b):
|
|
47
|
+
return a**b
|
|
48
|
+
def sqrt(a):
|
|
49
|
+
return np.sqrt(a)
|
|
50
|
+
def sqrd(a):
|
|
51
|
+
return a**2
|
|
52
|
+
def inv(a):
|
|
53
|
+
return np.array(a).astype(float)**(-1)
|
|
54
|
+
def sin(a):
|
|
55
|
+
return np.sin(a)
|
|
56
|
+
def cos(a):
|
|
57
|
+
return np.cos(a)
|
|
58
|
+
def tan(a):
|
|
59
|
+
return np.tan(a)
|
|
60
|
+
def arccos(a):
|
|
61
|
+
return np.arccos(a)
|
|
62
|
+
def arcsin(a):
|
|
63
|
+
return np.arcsin(a)
|
|
64
|
+
def arctan(a):
|
|
65
|
+
return np.arctan(a)
|
|
66
|
+
def tanh(a):
|
|
67
|
+
return np.tanh(a)
|
|
68
|
+
def log(a):
|
|
69
|
+
return np.log(a)
|
|
70
|
+
|
|
71
|
+
def defaultOps():
|
|
72
|
+
return [protectDiv,add,sub,mult,exp,sqrd,sqrt,inv,"pop","pop","pop","pop","pop","pop"]
|
|
73
|
+
def allOps():
|
|
74
|
+
return [protectDiv,add,sub,mult,exp,sqrd,sqrt,inv,cos,sin,tan,arccos,arcsin,arctan,tanh,log,"pop","pop","pop","pop","pop","pop","pop","pop","pop","pop"]
|
|
75
|
+
def randomInt(a=-3,b=3):
|
|
76
|
+
return random.randint(a,b)
|
|
77
|
+
def defaultConst():
|
|
78
|
+
return [np.pi, np.e, randomInt,ranReal ]
|
|
79
|
+
def ranReal(a=20,b=-10):
|
|
80
|
+
return random.random()*a-b
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
############################
|
|
84
|
+
#Data Subsampling Methods
|
|
85
|
+
############################
|
|
86
|
+
def randomSubsample(x,y):
|
|
87
|
+
n=max(int(np.ceil(len(y)**(3/5))),3)
|
|
88
|
+
idx=np.random.choice(range(x.shape[1]),n,replace=False)
|
|
89
|
+
return np.array([i[idx] for i in x]),y[idx]
|
|
90
|
+
|
|
91
|
+
def generationProportionalSample(x,y,generation=100,generations=100):
|
|
92
|
+
n=max(int(np.ceil(len(y)*(generation/generations)**(3/5))),3)
|
|
93
|
+
idx=np.random.choice(range(x.shape[1]),n,replace=False)
|
|
94
|
+
return np.array([i[idx] for i in x]),y[idx]
|
|
95
|
+
|
|
96
|
+
import inspect
|
|
97
|
+
def getArity(func): #Returns the arity of a function: used for model evaluations
|
|
98
|
+
if func=="pop":
|
|
99
|
+
return 1
|
|
100
|
+
return len(inspect.signature(func).parameters)
|
|
101
|
+
|
|
102
|
+
getArity.__doc__ = "getArity(func) takes a function and returns the function arity"
|
|
103
|
+
def modelArity(model): #Returns the total arity of a model
|
|
104
|
+
return 1+sum([getArity(i)-1 for i in model[0]])
|
|
105
|
+
|
|
106
|
+
modelArity.__doc__ = "modelArity(model) returns the total arity of a model"
|
|
107
|
+
def listArity(data): #Returns arity of evaluating a list of operators
|
|
108
|
+
if len(data)==0:
|
|
109
|
+
return 0
|
|
110
|
+
return 1+sum([getArity(i)-1 for i in data])
|
|
111
|
+
listArity.__doc__ = "listArity(list) returns the arity of evaluating a list of operators"
|
|
112
|
+
def buildEmptyModel(): # Generates an empty model
|
|
113
|
+
return [[],[],[]]
|
|
114
|
+
buildEmptyModel.__doc__ = "buildEmptyModel() takes no inputs and generates an empty GP model"
|
|
115
|
+
def variableSelect(num): #Function that creates a function to select a specific variable
|
|
116
|
+
return lambda variables: variables[num]
|
|
117
|
+
variableSelect.__doc__ = "variableSelect(n) is a function that creates a function to select the nth variable"
|
|
118
|
+
def modelToListForm(model):
|
|
119
|
+
model[0]=model[0].tolist()
|
|
120
|
+
def modelRestoreForm(model):
|
|
121
|
+
model[0]=np.array(model[0],dtype=object)
|
|
122
|
+
|
|
123
|
+
def generateRandomModel(variables,ops,const,maxLength): #Generates a random GP model
|
|
124
|
+
prog = buildEmptyModel() #Generate an empty model with correct structure
|
|
125
|
+
varChoices=[variableSelect(i) for i in range(variables)]+const #All variable and constants choices
|
|
126
|
+
prog[0]=np.array(np.random.choice(ops,random.randint(1,maxLength)),dtype=object) #Choose random operators
|
|
127
|
+
countVars=modelArity(prog) #Count how many variables/constants are needed
|
|
128
|
+
prog[1]=np.random.choice(varChoices,countVars) #Choose random variables/constants
|
|
129
|
+
prog[1]=[i() if (callable(i) and i.__name__!='<lambda>' )else i for i in prog[1]] #If function then evaluate
|
|
130
|
+
return prog
|
|
131
|
+
generateRandomModel.__doc__ = "generateRandomModel() takes as input the variables, operators, constants, and max program length and returns a random program"
|
|
132
|
+
def initializeGPModels(variables,ops=defaultOps(),const=defaultConst(),numberOfModels=100,maxLength=10): # generate random linear program
|
|
133
|
+
prog=[[],[],[]]
|
|
134
|
+
# prog stores [Operators, VarConst, QualityMetrics]
|
|
135
|
+
|
|
136
|
+
models=[generateRandomModel(variables,ops,const,maxLength) for i in range(numberOfModels)] #Generate models
|
|
137
|
+
|
|
138
|
+
return models
|
|
139
|
+
initializeGPModels.__doc__ = "initializeGPModels(countOfVariables, operators, constants, numberOfModels=100, maxLength=10) returns a set of randomly generated models"
|
|
140
|
+
|
|
141
|
+
def reverseList(data): #Returns a list reversed
|
|
142
|
+
return [i for i in reversed(data)]
|
|
143
|
+
reverseList.__doc__ = "reverseList(data) returns the data list reversed"
|
|
144
|
+
def varReplace(data,variables): #Replaces variable references with data during model evaluation
|
|
145
|
+
return [i(variables) if callable(i) else i for i in data]
|
|
146
|
+
varReplace.__doc__ = "varReplace(data,variables) replaces references to variables in data with actual values"
|
|
147
|
+
def inputLen(data): #Returns the number of data records in a data set
|
|
148
|
+
el1=data[0]
|
|
149
|
+
if type(el1)==list or type(el1)==np.ndarray:
|
|
150
|
+
return len(el1)
|
|
151
|
+
else:
|
|
152
|
+
return 1
|
|
153
|
+
inputLen.__doc__ = "inputLen(data) determines the number of data records in a data set"
|
|
154
|
+
def varCount(data): #Returns the number of variables in a data set
|
|
155
|
+
return len(data)
|
|
156
|
+
varCount.__doc__ = "varCount(data) determines the number of variables in a data set"
|
|
157
|
+
def evaluateGPModel(model,inputData): #Evaluates a model numerically
|
|
158
|
+
response=evModHelper(model[1],model[0],[],np.array(inputData).astype(float))[2][0]
|
|
159
|
+
if not type(response)==np.ndarray and inputLen(inputData)>1:
|
|
160
|
+
response=np.array([response for i in range(inputLen(inputData))])
|
|
161
|
+
return response
|
|
162
|
+
evaluateGPModel.__doc__ = "evaluateGPModel(model,data) numerically evaluates a model using the data stored in inputData"
|
|
163
|
+
def evModHelper(varStack,opStack,tempStack,data): #Recursive helper function for evaluateGPModel
|
|
164
|
+
stack1=varStack
|
|
165
|
+
stack2=opStack
|
|
166
|
+
stack3=tempStack
|
|
167
|
+
|
|
168
|
+
if len(stack2)==0:
|
|
169
|
+
return [stack3,stack2,stack1]
|
|
170
|
+
op=stack2[0]
|
|
171
|
+
stack2=stack2[1:]
|
|
172
|
+
|
|
173
|
+
if callable(op):
|
|
174
|
+
|
|
175
|
+
patt=getArity(op)
|
|
176
|
+
while patt>len(stack3):
|
|
177
|
+
stack3=[stack1[0]]+stack3
|
|
178
|
+
stack1=stack1[1:]
|
|
179
|
+
try:
|
|
180
|
+
temp=op(*varReplace(reverseList(stack3[:patt]),data))
|
|
181
|
+
except TypeError:
|
|
182
|
+
print("stack3: ", stack3, " patt: ", patt, " data: ", data)
|
|
183
|
+
temp=np.nan
|
|
184
|
+
except OverflowError:
|
|
185
|
+
temp=np.nan
|
|
186
|
+
stack3=stack3[patt:]
|
|
187
|
+
stack3=[temp]+stack3
|
|
188
|
+
|
|
189
|
+
else:
|
|
190
|
+
if len(stack1)>0:
|
|
191
|
+
stack3=varReplace([stack1[0]],data)+stack3
|
|
192
|
+
stack1=stack1[1:]
|
|
193
|
+
if len(stack2)>0:
|
|
194
|
+
stack1,stack2,stack3=evModHelper(stack1,stack2,stack3,data)
|
|
195
|
+
|
|
196
|
+
return [stack1,stack2,stack3]
|
|
197
|
+
evModHelper.__doc__ = "evModHelper(varStack,opStack,tempStack,data) is a helper function for evaluateGPModel"
|
|
198
|
+
def fitness(prog,data,response): # Fitness function using correlation
|
|
199
|
+
predicted=evaluateGPModel(prog,np.array(data))
|
|
200
|
+
if type(predicted)!=list and type(predicted)!=np.ndarray:
|
|
201
|
+
predicted=np.array([predicted for i in range(inputLen(data))])
|
|
202
|
+
try:
|
|
203
|
+
if np.isnan(predicted).any() or np.isinf(predicted).any():
|
|
204
|
+
return np.nan
|
|
205
|
+
except TypeError:
|
|
206
|
+
#print(predicted)
|
|
207
|
+
return np.nan
|
|
208
|
+
except OverflowError:
|
|
209
|
+
return np.nan
|
|
210
|
+
if (not all(np.isfinite(np.array(predicted,dtype=np.float32)))) or np.all(predicted==predicted[0]):
|
|
211
|
+
return np.nan
|
|
212
|
+
try:
|
|
213
|
+
fit=1-pearsonr(predicted,np.array(response))[0]**2 # 1-R^2
|
|
214
|
+
except ValueError:
|
|
215
|
+
return 1
|
|
216
|
+
if math.isnan(fit):
|
|
217
|
+
return 1 # If nan return 1 as fitness
|
|
218
|
+
return fit # Else return actual fitness 1-R^2
|
|
219
|
+
fitness.__doc__ = "fitness(program,data,response) returns the 1-R^2 value of a model"
|
|
220
|
+
def stackGPModelComplexity(model,*args):
|
|
221
|
+
return len(model[0])+len(model[1])-model[0].tolist().count("pop")
|
|
222
|
+
stackGPModelComplexity.__doc__ = "stackGPModelComplexity(model) returns the complexity of the model"
|
|
223
|
+
def setModelQuality(model,inputData,response,modelEvaluationMetrics=[fitness,stackGPModelComplexity]):
|
|
224
|
+
model[2]=[i(model,inputData,response) for i in modelEvaluationMetrics]
|
|
225
|
+
|
|
226
|
+
setModelQuality.__doc__ = "setModelQuality(model, inputdata, response, metrics=[r2,size]) is an inplace operator that sets a models quality"
|
|
227
|
+
def stackPass(model,pt):
|
|
228
|
+
i=0
|
|
229
|
+
t=0
|
|
230
|
+
p=0
|
|
231
|
+
s=model[0]
|
|
232
|
+
if i <pt:
|
|
233
|
+
t+=1
|
|
234
|
+
while i<pt:
|
|
235
|
+
if s[i]=="pop":
|
|
236
|
+
t+=1
|
|
237
|
+
p+=1
|
|
238
|
+
else:
|
|
239
|
+
p+=max(0,getArity(s[i])-t)
|
|
240
|
+
t=max(1,t-getArity(s[i])+1)
|
|
241
|
+
i+=1
|
|
242
|
+
stack1=model[1][p:]
|
|
243
|
+
stack2=reverseList(model[1][:p])[:t+1]
|
|
244
|
+
return [stack1,stack2]
|
|
245
|
+
def stackGrab(stack1, stack2, num):
|
|
246
|
+
tStack1=copy.deepcopy(stack1)
|
|
247
|
+
tStack2=copy.deepcopy(stack2)
|
|
248
|
+
newStack=[]
|
|
249
|
+
if len(stack2)<num:
|
|
250
|
+
newStack=stack2+stack1[:(num-len(stack2))]
|
|
251
|
+
tStack1=tStack1[num-len(tStack2):]
|
|
252
|
+
tStack2=[]
|
|
253
|
+
else:
|
|
254
|
+
newStack=stack2[:num]
|
|
255
|
+
tStack2=tStack2[num:]
|
|
256
|
+
return [newStack,tStack1,tStack2]
|
|
257
|
+
def fragmentVariables(model,pts):
|
|
258
|
+
stack1,stack2=stackPass(model,pts[0])
|
|
259
|
+
opStack=model[0]
|
|
260
|
+
newStack=[]
|
|
261
|
+
i=pts[0]
|
|
262
|
+
while i<=pts[1]:
|
|
263
|
+
if opStack[i]=="pop" and len(stack1)>0:
|
|
264
|
+
stack2=[stack1[0]]+stack2
|
|
265
|
+
stack1=stack1[1:]
|
|
266
|
+
else:
|
|
267
|
+
if len(newStack)==0 and pts[0]==0:
|
|
268
|
+
tStack,stack1,stack2=stackGrab(stack1,stack2,getArity(opStack[i]))
|
|
269
|
+
else:
|
|
270
|
+
tStack,stack1,stack2=stackGrab(stack1,stack2,getArity(opStack[i])-1)
|
|
271
|
+
newStack=newStack+tStack
|
|
272
|
+
i+=1
|
|
273
|
+
return newStack
|
|
274
|
+
|
|
275
|
+
def recombination2pt(model1,model2): #2 point recombination
|
|
276
|
+
pts1=np.sort(random.sample(range(0,len(model1[0])+1),2))
|
|
277
|
+
pts2=np.sort(random.sample(range(0,len(model2[0])+1),2))
|
|
278
|
+
#pts1=[4,5]
|
|
279
|
+
#pts2=[2,4]
|
|
280
|
+
#pts1=[0,3]
|
|
281
|
+
#pts2=[1,3]
|
|
282
|
+
#print(pts1,pts2)
|
|
283
|
+
child1=buildEmptyModel()
|
|
284
|
+
child2=buildEmptyModel()
|
|
285
|
+
|
|
286
|
+
parent1=copy.deepcopy(model1)
|
|
287
|
+
parent2=copy.deepcopy(model2)
|
|
288
|
+
parent1[0]=np.array(parent1[0],dtype=object).tolist()
|
|
289
|
+
parent2[0]=np.array(parent2[0],dtype=object).tolist()
|
|
290
|
+
|
|
291
|
+
child1[0]=np.array(parent1[0][0:pts1[0]]+parent2[0][pts2[0]:pts2[1]]+parent1[0][pts1[1]:],dtype=object)
|
|
292
|
+
child2[0]=np.array(parent2[0][0:pts2[0]]+parent1[0][pts1[0]:pts1[1]]+parent2[0][pts2[1]:],dtype=object)
|
|
293
|
+
|
|
294
|
+
varPts1=[listArity(parent1[0][:(pts1[0])])+0,listArity(parent2[0][:(pts2[0])])+0,listArity(parent2[0][pts2[0]:pts2[1]]),listArity(parent1[0][pts1[0]:pts1[1]])]
|
|
295
|
+
if pts1[0]==0:
|
|
296
|
+
varPts1[0]+=1
|
|
297
|
+
if pts2[0]==0:
|
|
298
|
+
varPts1[1]+=1
|
|
299
|
+
child1[1]=parent1[1][:varPts1[0]]+parent2[1][varPts1[1]:(varPts1[1]+varPts1[2]-1)]+parent1[1][(varPts1[0]+varPts1[3]-1):]
|
|
300
|
+
|
|
301
|
+
varPts2=[listArity(parent2[0][:(pts2[0])])+0,listArity(parent1[0][:(pts1[0])])+0,listArity(parent1[0][pts1[0]:pts1[1]]),listArity(parent2[0][pts2[0]:pts2[1]])]
|
|
302
|
+
if pts1[0]==0:
|
|
303
|
+
varPts2[1]+=1
|
|
304
|
+
if pts2[0]==0:
|
|
305
|
+
varPts2[0]+=1
|
|
306
|
+
child2[1]=parent2[1][:varPts2[0]]+parent1[1][varPts2[1]:(varPts2[1]+varPts2[2]-1)]+parent2[1][(varPts2[0]+varPts2[3]-1):]
|
|
307
|
+
#print(varPts1,varPts2)
|
|
308
|
+
|
|
309
|
+
return [child1,child2]
|
|
310
|
+
recombination2pt.__doc__ = "recombination2pt(model1,model2) does 2 point crossover and returns two children models"
|
|
311
|
+
|
|
312
|
+
def get_numeric_indices(l): #Returns indices of list that are numeric
|
|
313
|
+
return [i for i in range(len(l)) if type(l[i]) in [int,float]]
|
|
314
|
+
|
|
315
|
+
|
|
316
|
+
def mutate(model,variables,ops=defaultOps(),const=defaultConst(),maxLength=10):
|
|
317
|
+
newModel=copy.deepcopy(model)
|
|
318
|
+
newModel[0]=np.array(newModel[0],dtype=object).tolist()
|
|
319
|
+
mutationType=random.randint(0,7)
|
|
320
|
+
varChoices=[variableSelect(i) for i in range(variables)]+const
|
|
321
|
+
opChoice=0
|
|
322
|
+
varChoice=0
|
|
323
|
+
|
|
324
|
+
tmp=0
|
|
325
|
+
|
|
326
|
+
if mutationType==0: #single operator mutation
|
|
327
|
+
opChoice=random.randint(0,len(newModel[0])-1)
|
|
328
|
+
if len(newModel[0])>0:
|
|
329
|
+
newModel[0][opChoice]=np.random.choice([i for i in ops] )
|
|
330
|
+
|
|
331
|
+
elif mutationType==1: #single variable mutation
|
|
332
|
+
varChoice=np.random.choice(varChoices)
|
|
333
|
+
if callable(varChoice) and varChoice.__name__!='<lambda>':
|
|
334
|
+
varChoice=varChoice()
|
|
335
|
+
newModel[1][random.randint(0,len(newModel[1])-1)]=varChoice
|
|
336
|
+
|
|
337
|
+
elif mutationType==2: #insertion mutation to top of stack
|
|
338
|
+
opChoice=np.random.choice(ops)
|
|
339
|
+
newModel[0]=[opChoice]+newModel[0]
|
|
340
|
+
while modelArity(newModel)>len(newModel[1]):
|
|
341
|
+
varChoice=np.random.choice(varChoices)
|
|
342
|
+
if callable(varChoice) and varChoice.__name__!='<lambda>':
|
|
343
|
+
varChoice=varChoice()
|
|
344
|
+
newModel[1]=[varChoice]+newModel[1]
|
|
345
|
+
|
|
346
|
+
elif mutationType==3: #deletion mutation from top of stack
|
|
347
|
+
if len(newModel[0])>1:
|
|
348
|
+
opChoice=random.randint(1,len(newModel[0])-1)
|
|
349
|
+
newModel[0]=newModel[0][-opChoice:]
|
|
350
|
+
newModel[1]=newModel[1][-listArity(newModel[0]):]
|
|
351
|
+
|
|
352
|
+
elif mutationType==4: #insertion mutation to bottom of stack
|
|
353
|
+
opChoice=np.random.choice([i for i in ops])
|
|
354
|
+
newModel[0].append(opChoice)
|
|
355
|
+
|
|
356
|
+
elif mutationType==5: #mutation via crossover with random model
|
|
357
|
+
newModel=recombination2pt(newModel,generateRandomModel(variables,ops,const,maxLength))[0]
|
|
358
|
+
|
|
359
|
+
elif mutationType==6: #single operator insertion mutation
|
|
360
|
+
singleOps=[op for op in ops if getArity(op)==1 and op!='pop']
|
|
361
|
+
singleOps.append('pop')
|
|
362
|
+
pos=random.randint(0,len(newModel[0])-1)
|
|
363
|
+
newModel[0].insert(pos,np.random.choice(singleOps))
|
|
364
|
+
|
|
365
|
+
elif mutationType==7: #nudge numeric constant
|
|
366
|
+
pos=get_numeric_indices(newModel[1])
|
|
367
|
+
if(len(pos)>0): #If there are numeric constants
|
|
368
|
+
pos=random.choice(pos)
|
|
369
|
+
newModel[1][pos]=newModel[1][pos]+np.random.normal(-1,1)
|
|
370
|
+
|
|
371
|
+
if modelArity(newModel)<len(newModel[1]):
|
|
372
|
+
newModel[1]=newModel[1][:modelArity(newModel)]
|
|
373
|
+
elif modelArity(newModel)>len(newModel[1]):
|
|
374
|
+
newModel[1]=newModel[1]+[np.random.choice(varChoices) for i in range(modelArity(newModel)-len(newModel[1]))]
|
|
375
|
+
newModel[1]=[varChoice() if callable(varChoice) and varChoice.__name__!='<lambda>' else varChoice for varChoice in newModel[1]]
|
|
376
|
+
newModel[0]=np.array(newModel[0],dtype=object)
|
|
377
|
+
return newModel
|
|
378
|
+
|
|
379
|
+
mutate.__doc__ = "mutate(model,variableCount,ops,constants,maxLength) mutates a model"
|
|
380
|
+
def paretoFront(fitValues): #Returns Boolean list of Pareto front elements
|
|
381
|
+
onFront = np.ones(fitValues.shape[0], dtype = bool)
|
|
382
|
+
for i, j in enumerate(fitValues):
|
|
383
|
+
if onFront[i]:
|
|
384
|
+
onFront[onFront] = np.any(fitValues[onFront]<j, axis=1)
|
|
385
|
+
onFront[i] = True
|
|
386
|
+
return onFront
|
|
387
|
+
def paretoTournament(pop): # selects the Pareto front of a model set
|
|
388
|
+
fitnessValues=np.array([mod[2] for mod in pop])
|
|
389
|
+
return (np.array(pop,dtype=object)[paretoFront(fitnessValues)]).tolist()
|
|
390
|
+
def tournamentModelSelection(models, popSize=100,tourneySize=5):
|
|
391
|
+
selectedModels=[]
|
|
392
|
+
selectionSize=popSize
|
|
393
|
+
while len(selectedModels)<popSize:
|
|
394
|
+
tournament=random.sample(models,tourneySize)
|
|
395
|
+
winners=paretoTournament(tournament)
|
|
396
|
+
selectedModels=selectedModels+winners
|
|
397
|
+
|
|
398
|
+
return selectedModels
|
|
399
|
+
paretoTournament.__doc__ = "paretoTournament(models, inputData, responseData) returns the Pareto front of a model set"
|
|
400
|
+
def modelSameQ(model1,model2): #Checks if two models are the same
|
|
401
|
+
return len(model1[0])==len(model2[0]) and len(model1[1]) == len(model2[1]) and all(model1[0]==model2[0]) and model1[1]==model2[1]
|
|
402
|
+
modelSameQ.__doc__ = "modelSameQ(model1,model2) checks if model1 and model2 are the same and returns True if so, else False"
|
|
403
|
+
def deleteDuplicateModels(models): #Removes any models that are the same, does not consider simplified form
|
|
404
|
+
uniqueMods = [models[0]]
|
|
405
|
+
|
|
406
|
+
for mod in models:
|
|
407
|
+
test=False
|
|
408
|
+
for checkMod in uniqueMods:
|
|
409
|
+
if modelSameQ(mod,checkMod):
|
|
410
|
+
test=True
|
|
411
|
+
if not test:
|
|
412
|
+
uniqueMods.append(mod)
|
|
413
|
+
|
|
414
|
+
return uniqueMods
|
|
415
|
+
deleteDuplicateModels.__doc__ = "deleteDuplicateModels(models) deletes models that have the same form without simplifying"
|
|
416
|
+
|
|
417
|
+
def deleteDuplicateModelsPhenotype(models): #Removes any models that are the same regarding phenotype, does not consider simplified form
|
|
418
|
+
uniqueMods = [printGPModel(models[0])]
|
|
419
|
+
remainingMods=[printGPModel(mod) for mod in models[1:]]
|
|
420
|
+
uniquePos = [0]
|
|
421
|
+
currPos=1
|
|
422
|
+
for mod in remainingMods:
|
|
423
|
+
test=False
|
|
424
|
+
for checkMod in uniqueMods:
|
|
425
|
+
if mod==checkMod:
|
|
426
|
+
test=True
|
|
427
|
+
if not test:
|
|
428
|
+
uniqueMods.append(mod)
|
|
429
|
+
uniquePos.append(currPos)
|
|
430
|
+
currPos+=1
|
|
431
|
+
|
|
432
|
+
return [models[i] for i in uniquePos]
|
|
433
|
+
|
|
434
|
+
def removeIndeterminateModels(models): #Removes models from the population that evaluate to nonreal values
|
|
435
|
+
return [i for i in models if (not any(np.isnan(i[2]))) and all(np.isfinite(np.isnan(i[2])))]
|
|
436
|
+
removeIndeterminateModels.__doc__ = "removeIndeterminateModels(models) removes models that have a fitness that results from inf or nan values"
|
|
437
|
+
def sortModels(models):
|
|
438
|
+
return sorted(models, key=lambda m:m[2])
|
|
439
|
+
sortModels.__doc__ = "sortModels(models) sorts a model population by the models' accuracies"
|
|
440
|
+
def selectModels(models, selectionSize=0.5):
|
|
441
|
+
tMods=copy.deepcopy(models)
|
|
442
|
+
[modelToListForm(mod) for mod in tMods]
|
|
443
|
+
paretoModels=[]
|
|
444
|
+
if selectionSize<=1:
|
|
445
|
+
selection=selectionSize*len(models)
|
|
446
|
+
else:
|
|
447
|
+
selection=selectionSize
|
|
448
|
+
|
|
449
|
+
while len(paretoModels)<selection:
|
|
450
|
+
front=paretoTournament(tMods)
|
|
451
|
+
paretoModels=paretoModels+front
|
|
452
|
+
for i in front:
|
|
453
|
+
tMods.remove(i)
|
|
454
|
+
[modelRestoreForm(mod) for mod in paretoModels]
|
|
455
|
+
return paretoModels
|
|
456
|
+
selectModels.__doc__ = "selectModels(models, selectionSize=0.5) iteratively selects the Pareto front of a model population until n or n*popSize models are selected"
|
|
457
|
+
def stackVarUsage(opStack): #Counts how many variables are used by the operator stack
|
|
458
|
+
pos=getArity(opStack[0])
|
|
459
|
+
for j in range(1,len(opStack)):
|
|
460
|
+
pos+=getArity(opStack[j])-1
|
|
461
|
+
if opStack[j]=='pop':
|
|
462
|
+
pos+=1
|
|
463
|
+
return pos
|
|
464
|
+
stackVarUsage.__doc__ = "stackVarUsage(opStack) is a helper function that determines how many variables/constants are needed by the operator stack"
|
|
465
|
+
def trimModel(mod): #Removes extra pop operators that do nothing
|
|
466
|
+
model=copy.deepcopy(mod)
|
|
467
|
+
i=0
|
|
468
|
+
varStack=len(mod[1])
|
|
469
|
+
tempStack=0
|
|
470
|
+
varStack-=getArity(model[0][i])
|
|
471
|
+
tempStack+=1
|
|
472
|
+
i+=1
|
|
473
|
+
while varStack>0:
|
|
474
|
+
if model[0][i]=='pop':
|
|
475
|
+
varStack-=1
|
|
476
|
+
tempStack+=1
|
|
477
|
+
else:
|
|
478
|
+
|
|
479
|
+
take=getArity(model[0][i])-tempStack
|
|
480
|
+
if take>0:
|
|
481
|
+
varStack-=take
|
|
482
|
+
tempStack=1
|
|
483
|
+
else:
|
|
484
|
+
tempStack-=getArity(model[0][i])-1
|
|
485
|
+
i+=1
|
|
486
|
+
model[0]=np.array(model[0][:i].tolist()+[j for j in model[0][i:] if not j=='pop'],dtype=object)
|
|
487
|
+
return model
|
|
488
|
+
trimModel.__doc__ = "trimModel(model) trims extra pop operators off the operator stack so that further modifications such as a model alignment aren't altered by those pop operators"
|
|
489
|
+
def alignGPModel(model, data, response): #Aligns a model
|
|
490
|
+
prediction=evaluateGPModel(model,data)
|
|
491
|
+
if (not all(np.isfinite(np.array(prediction)))) or np.all(prediction==prediction[0]):
|
|
492
|
+
return model
|
|
493
|
+
if np.isnan(np.array(prediction)).any() or np.isnan(np.array(response)).any() or not np.isfinite(np.array(prediction,dtype=np.float32)).all():
|
|
494
|
+
return model
|
|
495
|
+
try:
|
|
496
|
+
align=np.round(np.polyfit(prediction,response,1,rcond=1e-16),decimals=14)
|
|
497
|
+
except np.linalg.LinAlgError:
|
|
498
|
+
#print("Alignment failed for: ", model, " with prediction: ", prediction, "and reference data: ", response)
|
|
499
|
+
return model
|
|
500
|
+
newModel=trimModel(model)
|
|
501
|
+
newModel[0]=np.array(newModel[0].tolist()+[mult,add],dtype=object)
|
|
502
|
+
newModel[1]=newModel[1]+align.tolist()
|
|
503
|
+
setModelQuality(newModel,data,response)
|
|
504
|
+
return newModel
|
|
505
|
+
alignGPModel.__doc__ = "alignGPModel(model, input, response) aligns a model such that response-a*f(x)+b are minimized over a and b"
|
|
506
|
+
def evolve(inputData, responseData, generations=100, ops=defaultOps(), const=defaultConst(), variableNames=[], mutationRate=79, crossoverRate=11, spawnRate=10, extinction=False,extinctionRate=10,elitismRate=50,popSize=300,maxComplexity=100,align=True,initialPop=[],timeLimit=300,capTime=False,tourneySize=5,tracking=False,modelEvaluationMetrics=[fitness,stackGPModelComplexity],dataSubsample=False,samplingMethod=randomSubsample):
|
|
507
|
+
|
|
508
|
+
fullInput,fullResponse=copy.deepcopy(inputData),copy.deepcopy(responseData)
|
|
509
|
+
inData=copy.deepcopy(fullInput)
|
|
510
|
+
resData=copy.deepcopy(fullResponse)
|
|
511
|
+
variableCount=varCount(inData)
|
|
512
|
+
models=initializeGPModels(variableCount,ops,const,popSize)
|
|
513
|
+
models=models+initialPop
|
|
514
|
+
startTime=time.perf_counter()
|
|
515
|
+
bestFits=[]
|
|
516
|
+
for i in range(generations):
|
|
517
|
+
if capTime and time.perf_counter()-startTime>timeLimit:
|
|
518
|
+
break
|
|
519
|
+
if dataSubsample:
|
|
520
|
+
inData,resData=samplingMethod(fullInput,fullResponse)
|
|
521
|
+
for mods in models:
|
|
522
|
+
setModelQuality(mods,inData,resData,modelEvaluationMetrics=modelEvaluationMetrics)
|
|
523
|
+
models=removeIndeterminateModels(models)
|
|
524
|
+
if tracking:
|
|
525
|
+
bestFits.append(min([mods[2][0] for mods in paretoTournament(models)]))
|
|
526
|
+
|
|
527
|
+
#paretoModels=paretoTournament(models)
|
|
528
|
+
paretoModels=selectModels(models,elitismRate/100*popSize if elitismRate/100*popSize<len(models) else len(models))
|
|
529
|
+
if extinction and i%extinctionRate:
|
|
530
|
+
models=initializeGPModels(variableCount,ops,const,popSize)
|
|
531
|
+
for mods in models:
|
|
532
|
+
setModelQuality(mods,inData,resData,modelEvaluationMetrics=modelEvaluationMetrics)
|
|
533
|
+
|
|
534
|
+
models=tournamentModelSelection(models,popSize,tourneySize)
|
|
535
|
+
|
|
536
|
+
crossoverPairs=random.sample(models,round(crossoverRate/100*popSize))
|
|
537
|
+
toMutate=random.sample(models,round(mutationRate/100*popSize))
|
|
538
|
+
|
|
539
|
+
childModels=paretoModels
|
|
540
|
+
|
|
541
|
+
for j in range(round(len(crossoverPairs)/2)-1):
|
|
542
|
+
childModels=childModels+recombination2pt(crossoverPairs[j],crossoverPairs[j+round(len(crossoverPairs)/2)])
|
|
543
|
+
|
|
544
|
+
for j in toMutate:
|
|
545
|
+
childModels=childModels+[mutate(j,variableCount,ops,const)]
|
|
546
|
+
|
|
547
|
+
childModels=childModels+initializeGPModels(variableCount,ops,const,round(spawnRate/100*popSize))
|
|
548
|
+
|
|
549
|
+
childModels=deleteDuplicateModels(childModels)
|
|
550
|
+
childModels=[model for model in childModels if stackGPModelComplexity(model)<maxComplexity]
|
|
551
|
+
|
|
552
|
+
#for mods in childModels:
|
|
553
|
+
# setModelQuality(mods,inData,resData,modelEvaluationMetrics=modelEvaluationMetrics)
|
|
554
|
+
#childModels=removeIndeterminateModels(childModels)
|
|
555
|
+
|
|
556
|
+
if len(childModels)<popSize:
|
|
557
|
+
childModels=childModels+initializeGPModels(variableCount,ops,const,popSize-len(childModels))
|
|
558
|
+
|
|
559
|
+
models=copy.deepcopy(childModels)
|
|
560
|
+
|
|
561
|
+
|
|
562
|
+
for mods in models:
|
|
563
|
+
setModelQuality(mods,fullInput,fullResponse,modelEvaluationMetrics=modelEvaluationMetrics)
|
|
564
|
+
models=[trimModel(mod) for mod in models]
|
|
565
|
+
models=deleteDuplicateModels(models)
|
|
566
|
+
models=removeIndeterminateModels(models)
|
|
567
|
+
models=sortModels(models)
|
|
568
|
+
if align:
|
|
569
|
+
models=[alignGPModel(mods,fullInput,fullResponse) for mods in models]
|
|
570
|
+
|
|
571
|
+
if tracking:
|
|
572
|
+
bestFits.append(min([mods[2][0] for mods in paretoTournament(models)]))
|
|
573
|
+
plt.figure()
|
|
574
|
+
plt.plot(bestFits)
|
|
575
|
+
plt.title("Fitness over Time")
|
|
576
|
+
plt.xlabel("Generations")
|
|
577
|
+
plt.ylabel("Fitness")
|
|
578
|
+
plt.show()
|
|
579
|
+
|
|
580
|
+
return models
|
|
581
|
+
|
|
582
|
+
|
|
583
|
+
def replaceFunc(stack,f1,f2):
|
|
584
|
+
return [i if i!=f1 else f2 for i in stack]
|
|
585
|
+
def printGPModel(mod,inputData=symbols(["x"+str(i) for i in range(100)])): #Evaluates a model algebraically
|
|
586
|
+
def inv1(a):
|
|
587
|
+
return a**(-1)
|
|
588
|
+
from sympy import tan as tan1, exp as exp1, sqrt as sqrt1, sin as sin1, cos as cos1, acos, asin, atan, tanh as tanh1, log as log1
|
|
589
|
+
def sqrt2(a):
|
|
590
|
+
return sqrt1(a)
|
|
591
|
+
def log2(a):
|
|
592
|
+
return log1(a)
|
|
593
|
+
model = copy.deepcopy(mod)
|
|
594
|
+
model[0] = replaceFunc(model[0],exp,exp1)
|
|
595
|
+
model[0] = replaceFunc(model[0],tan,tan1)
|
|
596
|
+
model[0] = replaceFunc(model[0],sqrt,sqrt2)
|
|
597
|
+
model[0] = replaceFunc(model[0],inv,inv1)
|
|
598
|
+
model[0] = replaceFunc(model[0],sin,sin1)
|
|
599
|
+
model[0] = replaceFunc(model[0],cos,cos1)
|
|
600
|
+
model[0] = replaceFunc(model[0],arccos,acos)
|
|
601
|
+
model[0] = replaceFunc(model[0],arcsin,asin)
|
|
602
|
+
model[0] = replaceFunc(model[0],arctan,atan)
|
|
603
|
+
model[0] = replaceFunc(model[0],tanh,tanh1)
|
|
604
|
+
model[0] = replaceFunc(model[0],log,log2)
|
|
605
|
+
response=evModHelper(model[1],model[0],[],np.array(inputData))[2][0]
|
|
606
|
+
return response
|
|
607
|
+
|
|
608
|
+
def ensembleSelect(models, inputData, responseData, numberOfClusters=10): #Generates a model ensemble using input data partitions
|
|
609
|
+
data=np.transpose(inputData)
|
|
610
|
+
if len(data)<numberOfClusters:
|
|
611
|
+
numberOfClusters=len(data)
|
|
612
|
+
clusters=KMeans(n_clusters=numberOfClusters).fit_predict(data)
|
|
613
|
+
if numberOfClusters>len(set(clusters)):
|
|
614
|
+
numberOfClusters=len(set(clusters))
|
|
615
|
+
clusters=KMeans(n_clusters=numberOfClusters).fit_predict(data)
|
|
616
|
+
dataParts=[]
|
|
617
|
+
partsResponse=[]
|
|
618
|
+
for i in range(numberOfClusters):
|
|
619
|
+
dataParts.append([])
|
|
620
|
+
partsResponse.append([])
|
|
621
|
+
|
|
622
|
+
for i in range(len(clusters)):
|
|
623
|
+
dataParts[clusters[i]].append(data[i])
|
|
624
|
+
partsResponse[clusters[i]].append(responseData[i])
|
|
625
|
+
|
|
626
|
+
modelResiduals=[]
|
|
627
|
+
|
|
628
|
+
for i in range(len(models)):
|
|
629
|
+
modelResiduals.append([])
|
|
630
|
+
for i in range(len(models)):
|
|
631
|
+
for j in range(numberOfClusters):
|
|
632
|
+
modelResiduals[i].append(fitness(models[i],np.transpose(dataParts[j]),partsResponse[j]))
|
|
633
|
+
|
|
634
|
+
best=[]
|
|
635
|
+
for i in range(numberOfClusters):
|
|
636
|
+
ordering=np.argsort(modelResiduals[i])
|
|
637
|
+
j=0
|
|
638
|
+
while ordering[j] in best:
|
|
639
|
+
j+=1
|
|
640
|
+
best.append(ordering[j])
|
|
641
|
+
ensemble=[models[best[i]] for i in range(numberOfClusters)]
|
|
642
|
+
|
|
643
|
+
return ensemble
|
|
644
|
+
def uncertainty(data,trim=0.3):
|
|
645
|
+
wl=None
|
|
646
|
+
if len(data)<=4:
|
|
647
|
+
wl=1
|
|
648
|
+
h=differential_entropy(data,window_length=wl)
|
|
649
|
+
if np.isfinite(h):
|
|
650
|
+
return h
|
|
651
|
+
else:
|
|
652
|
+
return 0
|
|
653
|
+
|
|
654
|
+
def evaluateModelEnsemble(ensemble, inputData):
|
|
655
|
+
responses=[evaluateGPModel(mod, inputData) for mod in ensemble]
|
|
656
|
+
if type(responses[0])==np.ndarray:
|
|
657
|
+
responses=np.transpose(responses)
|
|
658
|
+
uncertainties=[uncertainty(res,0) for res in responses]
|
|
659
|
+
else:
|
|
660
|
+
|
|
661
|
+
uncertainties=[uncertainty(responses,0)]
|
|
662
|
+
|
|
663
|
+
return uncertainties
|
|
664
|
+
def relativeEnsembleUncertainty(ensemble,inputData):
|
|
665
|
+
output=evaluateModelEnsemble(ensemble,inputData)
|
|
666
|
+
return np.array(output)
|
|
667
|
+
|
|
668
|
+
def createUncertaintyFunc(ensemble):
|
|
669
|
+
return lambda x: -relativeEnsembleUncertainty(ensemble,x)
|
|
670
|
+
|
|
671
|
+
def maximizeUncertainty(ensemble,varCount,bounds=[]): #Used to select a new point of maximum uncertainty
|
|
672
|
+
func=createUncertaintyFunc(ensemble)
|
|
673
|
+
x0=[np.mean(bounds[i]) for i in range(varCount)]
|
|
674
|
+
if bounds==[]:
|
|
675
|
+
pt=minimize(func,x0).x
|
|
676
|
+
else:
|
|
677
|
+
pt=minimize(func,x0,bounds=bounds).x
|
|
678
|
+
return pt
|
|
679
|
+
def extendData(data,newPoint):
|
|
680
|
+
return np.concatenate((data.T,np.array([newPoint]))).T
|
|
681
|
+
|
|
682
|
+
def activeLearningCheckpoint(eqNum,version,i,inputData,response,testInput,testResponse,errors,models,minerr):
|
|
683
|
+
path=os.path.join(str(eqNum),str(version))
|
|
684
|
+
file=open(path,"wb+")
|
|
685
|
+
dill.dump([i,inputData,response,testInput,testResponse,errors,models,minerr],file)
|
|
686
|
+
file.close()
|
|
687
|
+
def activeLearningCheckpointLoad(eqNum,version,i,inputData,response,testInput,testResponse,errors,models,minerr):
|
|
688
|
+
path=os.path.join(str(eqNum),str(version))
|
|
689
|
+
try:
|
|
690
|
+
with open(path,'rb') as f:
|
|
691
|
+
i,inputData,response,testInput,testResponse,errors,models,minerr=dill.load(f)
|
|
692
|
+
except FileNotFoundError:
|
|
693
|
+
return i,inputData,response,testInput,testResponse,errors,models,minerr
|
|
694
|
+
return i,inputData,response,testInput,testResponse,errors,models,minerr
|
|
695
|
+
def subSampleSpace(space):
|
|
696
|
+
newSpace=copy.deepcopy(space)
|
|
697
|
+
newSpace=list(newSpace)
|
|
698
|
+
for i in range(len(newSpace)):
|
|
699
|
+
pts=sorted([np.random.uniform(newSpace[i][0],newSpace[i][1]),np.random.uniform(newSpace[i][0],newSpace[i][1])])
|
|
700
|
+
newSpace[i]=tuple(pts)
|
|
701
|
+
return tuple(newSpace)
|
|
702
|
+
|
|
703
|
+
def activeLearning(func, dims, ranges,rangesP,eqNum=1,version=1,iterations=100): #func should be a lamda function of form lambda data: f(data[0],data[1],...)
|
|
704
|
+
try:
|
|
705
|
+
with open(os.path.join(str(eqNum),str(version))+".txt",'rb') as f:
|
|
706
|
+
return -1
|
|
707
|
+
except FileNotFoundError:
|
|
708
|
+
pass
|
|
709
|
+
inputData=[]
|
|
710
|
+
testInput=[]
|
|
711
|
+
found=False
|
|
712
|
+
for i in range(dims):
|
|
713
|
+
inputData.append(np.random.uniform(ranges[i][0],ranges[i][1],3))
|
|
714
|
+
testInput.append(np.random.uniform(ranges[i][0],ranges[i][1],200))
|
|
715
|
+
inputData=np.array(inputData)
|
|
716
|
+
testInput=np.array(testInput)
|
|
717
|
+
response=func(inputData)
|
|
718
|
+
testResponse=func(testInput)
|
|
719
|
+
errors=[]
|
|
720
|
+
models=[]
|
|
721
|
+
minerr=1
|
|
722
|
+
for i in range(iterations):
|
|
723
|
+
print("input: ",inputData)
|
|
724
|
+
print("\n response: ",response)
|
|
725
|
+
i,inputData,response,testInput,testResponse,errors,models,minerr=activeLearningCheckpointLoad(eqNum,version,i,inputData,response,testInput,testResponse,errors,models,minerr)
|
|
726
|
+
if i>iterations-1:
|
|
727
|
+
break
|
|
728
|
+
i+=1
|
|
729
|
+
models1=evolve(inputData,response,initialPop=models,generations=1000,tracking=False,popSize=300,ops=allOps(),timeLimit=120,capTime=True,align=False,elitismRate=10)
|
|
730
|
+
models2=evolve(inputData,response,initialPop=models,generations=1000,tracking=False,popSize=300,ops=allOps(),timeLimit=120,capTime=True,align=False,elitismRate=10)
|
|
731
|
+
models3=evolve(inputData,response,initialPop=models,generations=1000,tracking=False,popSize=300,ops=allOps(),timeLimit=120,capTime=True,align=False,elitismRate=10)
|
|
732
|
+
models4=evolve(inputData,response,initialPop=models,generations=1000,tracking=False,popSize=300,ops=allOps(),timeLimit=120,capTime=True,align=False,elitismRate=10)
|
|
733
|
+
models=models1+models2+models3+models4
|
|
734
|
+
models=selectModels(models,20)
|
|
735
|
+
alignedModels=[alignGPModel(mods,inputData,response) for mods in models]
|
|
736
|
+
ensemble=ensembleSelect(alignedModels,inputData,response)
|
|
737
|
+
out=maximizeUncertainty(ensemble,dims,rangesP)
|
|
738
|
+
while out in inputData.T:
|
|
739
|
+
out=maximizeUncertainty(ensemble,dims,subSampleSpace(rangesP))
|
|
740
|
+
inputData=extendData(inputData,out)
|
|
741
|
+
response=func(inputData)
|
|
742
|
+
fitList=np.array([fitness(mod,testInput,testResponse) for mod in alignedModels])
|
|
743
|
+
errors.append(min(fitList[np.logical_not(np.isnan(fitList))]))
|
|
744
|
+
minerr=errors[-1]
|
|
745
|
+
if minerr<1e-14:
|
|
746
|
+
#print("Points needed in round", j,": ",3+i, " Time needed: ", time.perf_counter()-roundTime)
|
|
747
|
+
if not os.path.exists(str(eqNum)):
|
|
748
|
+
os.makedirs(str(eqNum))
|
|
749
|
+
path=os.path.join(str(eqNum),str(version))
|
|
750
|
+
file=open(path,"wb+")
|
|
751
|
+
dill.dump([i,inputData,response,testInput,testResponse,errors,models,minerr],file)
|
|
752
|
+
file.close()
|
|
753
|
+
file=open(path+'.txt','w+')
|
|
754
|
+
file.write(str(i+3)+'\n')
|
|
755
|
+
file.write(str(errors))
|
|
756
|
+
file.close()
|
|
757
|
+
return 3+i
|
|
758
|
+
found=True
|
|
759
|
+
ptsNeeded.append(3+i)
|
|
760
|
+
break
|
|
761
|
+
activeLearningCheckpoint(eqNum,version,i,inputData,response,testInput,testResponse,errors,models,minerr)
|
|
762
|
+
if found==False:
|
|
763
|
+
#print("Points needed in round",j,": NA (model not found)")
|
|
764
|
+
path=os.path.join(str(eqNum),str(version))
|
|
765
|
+
file=open(path,"wb")
|
|
766
|
+
dill.dump([-1,inputData,response,testInput,testResponse,errors,models,minerr],file)
|
|
767
|
+
file.close()
|
|
768
|
+
file=open(path+'.txt',"w+")
|
|
769
|
+
file.write(str(i+3)+"\n")
|
|
770
|
+
file.write(str(errors))
|
|
771
|
+
file.close()
|
|
772
|
+
return -1
|
|
773
|
+
|
|
774
|
+
def plotModels(models):
|
|
775
|
+
tMods=copy.deepcopy(models)
|
|
776
|
+
[modelToListForm(mod) for mod in tMods]
|
|
777
|
+
paretoModels=paretoTournament(tMods)
|
|
778
|
+
for i in paretoModels:
|
|
779
|
+
tMods.remove(i)
|
|
780
|
+
[modelRestoreForm(mod) for mod in paretoModels]
|
|
781
|
+
[modelRestoreForm(mod) for mod in tMods]
|
|
782
|
+
|
|
783
|
+
pAccuracies=[mod[2][0] for mod in paretoModels]
|
|
784
|
+
pComplexities=[mod[2][1] for mod in paretoModels]
|
|
785
|
+
|
|
786
|
+
accuracies=[mod[2][0] for mod in tMods]+pAccuracies
|
|
787
|
+
complexities=[mod[2][1] for mod in tMods]+pComplexities
|
|
788
|
+
colors=['blue' for i in range(len(tMods))]+['red' for i in range(len(pAccuracies))]
|
|
789
|
+
|
|
790
|
+
fig,ax = plt.subplots()
|
|
791
|
+
|
|
792
|
+
sc=plt.scatter(complexities,accuracies,color=colors)
|
|
793
|
+
plt.xlabel("Complexity")
|
|
794
|
+
plt.ylabel("1-R**2")
|
|
795
|
+
names=[str(printGPModel(mod)) for mod in tMods]+[str(printGPModel(mod)) for mod in paretoModels]
|
|
796
|
+
|
|
797
|
+
label = ax.annotate("", xy=(0,0), xytext=(np.min(complexities),np.mean([np.max(accuracies),np.min(accuracies)])),
|
|
798
|
+
bbox=dict(boxstyle="round", fc="w"),
|
|
799
|
+
arrowprops=dict(arrowstyle="->"))
|
|
800
|
+
label.set_visible(False)
|
|
801
|
+
|
|
802
|
+
def update_labels(ind):
|
|
803
|
+
|
|
804
|
+
pos = sc.get_offsets()[ind["ind"][0]]
|
|
805
|
+
label.xy = pos
|
|
806
|
+
text = "{}".format(" ".join([names[n] for n in [ind["ind"][0]]]))
|
|
807
|
+
label.set_text(text)
|
|
808
|
+
label.get_bbox_patch().set_facecolor('grey')
|
|
809
|
+
label.get_bbox_patch().set_alpha(0.9)
|
|
810
|
+
|
|
811
|
+
|
|
812
|
+
def hover(event):
|
|
813
|
+
vis = label.get_visible()
|
|
814
|
+
if event.inaxes == ax:
|
|
815
|
+
cont, ind = sc.contains(event)
|
|
816
|
+
if cont:
|
|
817
|
+
update_labels(ind)
|
|
818
|
+
label.set_visible(True)
|
|
819
|
+
fig.canvas.draw_idle()
|
|
820
|
+
else:
|
|
821
|
+
if vis:
|
|
822
|
+
label.set_visible(False)
|
|
823
|
+
fig.canvas.draw_idle()
|
|
824
|
+
|
|
825
|
+
fig.canvas.mpl_connect("motion_notify_event", hover)
|
|
826
|
+
|
|
827
|
+
plt.show()
|
|
828
|
+
|
|
829
|
+
def plotModelResponseComparison(model,inputData,response,sort=False):
|
|
830
|
+
plt.scatter(range(len(response)),response,label="True Response")
|
|
831
|
+
plt.scatter(range(len(response)),evaluateGPModel(model,inputData),label="Model Prediction")
|
|
832
|
+
plt.legend()
|
|
833
|
+
plt.xlabel("Data Index")
|
|
834
|
+
plt.ylabel("Response Value")
|
|
835
|
+
plt.show()
|
|
836
|
+
def plotPredictionResponseCorrelation(model,inputData,response):
|
|
837
|
+
plt.scatter(response,evaluateGPModel(model,inputData),label="Model")
|
|
838
|
+
plt.plot(response,response,label="Perfect Correlation",color='green')
|
|
839
|
+
plt.xlabel("True Response")
|
|
840
|
+
plt.ylabel("Predicted Response")
|
|
841
|
+
plt.legend()
|
|
842
|
+
plt.show()
|
|
843
|
+
#Plot model complexity distribution
|
|
844
|
+
def plotModelComplexityDistribution(models):
|
|
845
|
+
tMods=copy.deepcopy(models)
|
|
846
|
+
[modelToListForm(mod) for mod in tMods]
|
|
847
|
+
paretoModels=paretoTournament(tMods)
|
|
848
|
+
for i in paretoModels:
|
|
849
|
+
tMods.remove(i)
|
|
850
|
+
[modelRestoreForm(mod) for mod in paretoModels]
|
|
851
|
+
[modelRestoreForm(mod) for mod in tMods]
|
|
852
|
+
pComplexities=[mod[2][1] for mod in paretoModels]
|
|
853
|
+
tComplexities=[mod[2][1] for mod in tMods]
|
|
854
|
+
plt.hist(tComplexities,label="Non-Pareto Models")
|
|
855
|
+
plt.hist(pComplexities,label="Pareto Models")
|
|
856
|
+
plt.xlabel("Model Complexity")
|
|
857
|
+
plt.ylabel("Frequency")
|
|
858
|
+
plt.legend()
|
|
859
|
+
plt.show()
|
|
860
|
+
#Plot model accuracy distribution
|
|
861
|
+
def plotModelAccuracyDistribution(models):
|
|
862
|
+
tMods=copy.deepcopy(models)
|
|
863
|
+
[modelToListForm(mod) for mod in tMods]
|
|
864
|
+
paretoModels=paretoTournament(tMods)
|
|
865
|
+
for i in paretoModels:
|
|
866
|
+
tMods.remove(i)
|
|
867
|
+
[modelRestoreForm(mod) for mod in paretoModels]
|
|
868
|
+
[modelRestoreForm(mod) for mod in tMods]
|
|
869
|
+
pAccuracies=[mod[2][0] for mod in paretoModels]
|
|
870
|
+
tAccuracies=[mod[2][0] for mod in tMods]
|
|
871
|
+
plt.hist(tAccuracies,label="Non-Pareto Models")
|
|
872
|
+
plt.hist(pAccuracies,label="Pareto Models")
|
|
873
|
+
plt.xlabel("Model Accuracy")
|
|
874
|
+
plt.ylabel("Frequency")
|
|
875
|
+
plt.legend()
|
|
876
|
+
plt.show()
|
|
877
|
+
#Plot model residuals relative to response
|
|
878
|
+
def plotModelResiduals(model,input,response):
|
|
879
|
+
plt.scatter(response,evaluateGPModel(model,input)-response)
|
|
880
|
+
plt.xlabel("Response")
|
|
881
|
+
plt.ylabel("Residual")
|
|
882
|
+
plt.show()
|
|
883
|
+
#Plot model residual distribution
|
|
884
|
+
def plotModelResidualDistribution(model,input,response):
|
|
885
|
+
plt.hist(evaluateGPModel(model,input)-response)
|
|
886
|
+
plt.xlabel("Residual")
|
|
887
|
+
plt.ylabel("Frequency")
|
|
888
|
+
plt.show()
|
|
889
|
+
#Plot the presence of variables in a model population
|
|
890
|
+
def plotVariablePresence(models,variables=["x"+str(i) for i in range(100)],sort=False):
|
|
891
|
+
vars=[varReplace(model[1],variables) for model in models]
|
|
892
|
+
#Remove all numeric entries in vars
|
|
893
|
+
vars=[[i for i in var if type(i)!=int and type(i)!=float] for var in vars]
|
|
894
|
+
#Merge into one list
|
|
895
|
+
vars=[j for i in vars for j in i]
|
|
896
|
+
#Count frequency of each variable in vars
|
|
897
|
+
varFreqs=[vars.count(i) for i in variables]
|
|
898
|
+
#Keep only variables that appear at least once
|
|
899
|
+
variablesUsed=[variables[i] for i in range(len(varFreqs)) if varFreqs[i]>0]
|
|
900
|
+
varFreqs=[varFreqs[i] for i in range(len(varFreqs)) if varFreqs[i]>0]
|
|
901
|
+
if sort:
|
|
902
|
+
order=np.argsort(varFreqs)[::-1]
|
|
903
|
+
variablesUsed=[variablesUsed[i] for i in order]
|
|
904
|
+
varFreqs=[varFreqs[i] for i in order]
|
|
905
|
+
#Plot variable frequency
|
|
906
|
+
plt.bar(variablesUsed,varFreqs)
|
|
907
|
+
plt.xlabel("Variable")
|
|
908
|
+
plt.ylabel("Frequency")
|
|
909
|
+
plt.show()
|
|
910
|
+
def replaceOpsWithStrings(opStack):
|
|
911
|
+
model = copy.deepcopy(opStack)
|
|
912
|
+
model = replaceFunc(model,exp,str("exp"))
|
|
913
|
+
model = replaceFunc(model,tan,str("tan"))
|
|
914
|
+
model = replaceFunc(model,sqrt,str("sqrt"))
|
|
915
|
+
model = replaceFunc(model,inv,str("1/#"))
|
|
916
|
+
model = replaceFunc(model,sin,str("sin"))
|
|
917
|
+
model = replaceFunc(model,cos,str("cos"))
|
|
918
|
+
model = replaceFunc(model,arccos,str("acos"))
|
|
919
|
+
model = replaceFunc(model,arcsin,str("asin"))
|
|
920
|
+
model = replaceFunc(model,arctan,str("atan"))
|
|
921
|
+
model = replaceFunc(model,tanh,str("tanh"))
|
|
922
|
+
model = replaceFunc(model,log,str("log"))
|
|
923
|
+
model = replaceFunc(model,add,"+")
|
|
924
|
+
model = replaceFunc(model,mult,"*")
|
|
925
|
+
model = replaceFunc(model,sub,"-")
|
|
926
|
+
model = replaceFunc(model,protectDiv,"/")
|
|
927
|
+
model = replaceFunc(model,sqrd,"^2")
|
|
928
|
+
return model
|
|
929
|
+
#Plot the presence of operators in a model population
|
|
930
|
+
def plotOperatorPresence(models,sort=False,excludePop=True):
|
|
931
|
+
ops=[replaceOpsWithStrings(model[0]) for model in models]
|
|
932
|
+
#Merge into one list
|
|
933
|
+
ops=[j for i in ops for j in i]
|
|
934
|
+
#Remove duplicates in ops
|
|
935
|
+
uniqueOps=list(set(ops))
|
|
936
|
+
if excludePop:
|
|
937
|
+
#Remove pop operator
|
|
938
|
+
uniqueOps.remove('pop')
|
|
939
|
+
#Count frequency of each operator in ops
|
|
940
|
+
opFreqs=[ops.count(i) for i in uniqueOps]
|
|
941
|
+
#Keep only operators that appear at least once
|
|
942
|
+
opsUsed=[str(uniqueOps[i]) for i in range(len(opFreqs)) if opFreqs[i]>0]
|
|
943
|
+
opFreqs=[opFreqs[i] for i in range(len(opFreqs)) if opFreqs[i]>0]
|
|
944
|
+
if sort:
|
|
945
|
+
order=np.argsort(opFreqs)[::-1]
|
|
946
|
+
opsUsed=[opsUsed[i] for i in order]
|
|
947
|
+
opFreqs=[opFreqs[i] for i in order]
|
|
948
|
+
#Plot operator frequency
|
|
949
|
+
plt.bar(opsUsed,opFreqs)
|
|
950
|
+
#Rotate x axis labels
|
|
951
|
+
plt.xticks(rotation=0)
|
|
952
|
+
plt.xlabel("Operator")
|
|
953
|
+
plt.ylabel("Frequency")
|
|
954
|
+
plt.show()
|
|
955
|
+
|
|
956
|
+
############################
|
|
957
|
+
#Sharpness Computations
|
|
958
|
+
############################
|
|
959
|
+
|
|
960
|
+
def sharpnessConstants(model,inputData,responseData,numPerturbations=10,percentPerturbation=0.2):
|
|
961
|
+
|
|
962
|
+
fits=[]
|
|
963
|
+
|
|
964
|
+
#For each model parameter, if numeric, randomly perturb by x% and see how much the model changes
|
|
965
|
+
for i in range(numPerturbations):
|
|
966
|
+
tempModel=copy.deepcopy(model)
|
|
967
|
+
newParameters=[param if callable(param) else param*(1+percentPerturbation*(np.random.uniform()-0.5)) for param in model[1]]
|
|
968
|
+
tempModel[1]=newParameters
|
|
969
|
+
fits.append(fitness(tempModel,inputData,responseData))
|
|
970
|
+
return np.std(fits)
|
|
971
|
+
|
|
972
|
+
def sharpnessData(model,inputData,responseData,numPerturbations=10,percentPerturbation=0.2,preserveSign=False):
|
|
973
|
+
|
|
974
|
+
fits=[]
|
|
975
|
+
|
|
976
|
+
#For each vector, randomly perturb by x% of the standard deviation and see how much the model fitness changes
|
|
977
|
+
for i in range(numPerturbations):
|
|
978
|
+
tempData=copy.deepcopy(inputData)
|
|
979
|
+
tempData=np.array([(vec+percentPerturbation*np.std(vec)*(np.random.uniform(size=len(vec))-0.5)) for vec in tempData])
|
|
980
|
+
if preserveSign:
|
|
981
|
+
signs=[np.unique(var) for var in np.sign(inputData)]
|
|
982
|
+
tempData=[signs[i]*abs(tempData[i]) if len(signs[i])==1 else tempData[i] for i in range(len(signs))]
|
|
983
|
+
fits.append(fitness(model,tempData,responseData))
|
|
984
|
+
return np.std(fits)
|
|
985
|
+
|
|
986
|
+
def totalSharpness(model,inputData,responseData,numPerturbations=10,percentPerturbation=0.2,preserveSign=False):
|
|
987
|
+
|
|
988
|
+
return sharpnessConstants(model,inputData,responseData,numPerturbations=numPerturbations,percentPerturbation=percentPerturbation)+sharpnessData(model,inputData,responseData,numPerturbations=numPerturbations,percentPerturbation=percentPerturbation,preserveSign=preserveSign)
|
|
989
|
+
|
|
990
|
+
############################
|
|
991
|
+
#Multiple Independent Searches
|
|
992
|
+
############################
|
|
993
|
+
def runEpochs(x,y,epochs=5,**kwargs):
|
|
994
|
+
models=[]
|
|
995
|
+
for i in range(epochs):
|
|
996
|
+
models+=evolve(x,y,**kwargs)
|
|
997
|
+
|
|
998
|
+
return sortModels(models)
|
StackGP/__init__.py
ADDED
|
File without changes
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2023 Nathan Haut
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
Metadata-Version: 2.1
|
|
2
|
+
Name: StackGP
|
|
3
|
+
Version: 0.0.1
|
|
4
|
+
Summary: A stack-based genetic programming system
|
|
5
|
+
Author-email: Nathan Haut <hautnath@msu.edu>
|
|
6
|
+
Project-URL: Homepage, https://github.com/hoolagans/StackGP
|
|
7
|
+
Project-URL: Issues, https://github.com/hoolagans/StackGP/issues
|
|
8
|
+
Classifier: Programming Language :: Python :: 3
|
|
9
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
10
|
+
Classifier: Operating System :: OS Independent
|
|
11
|
+
Requires-Python: >=3.8
|
|
12
|
+
Description-Content-Type: text/markdown
|
|
13
|
+
License-File: LICENSE
|
|
14
|
+
|
|
15
|
+
# StackGP
|
|
16
|
+
A stack-based genetic programming system in Python
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
## Publications using StackGP:
|
|
21
|
+
|
|
22
|
+
1. Active Learning Improves Performance on Symbolic Regression Tasks in StackGP https://dl.acm.org/doi/10.1145/3520304.3528941
|
|
23
|
+
2. Correlation Versus RMSE Loss Functions in Symbolic Regression Tasks https://link.springer.com/chapter/10.1007/978-981-19-8460-0_2
|
|
24
|
+
3. Active Learning Informs Symbolic Regression Model Development in Genetic Programming https://doi.org/10.1145/3583133.3590577
|
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
StackGP/StackGP.py,sha256=8ZfawyMagpl-SpG8Ko4AY9MjJx5OuxSqnaJuCUYONG0,42145
|
|
2
|
+
StackGP/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
3
|
+
StackGP-0.0.1.dist-info/LICENSE,sha256=oP5zZM8kaqnHjiDmyzI6FBUMaE3U6Ay2EBlQW8P5AQE,1068
|
|
4
|
+
StackGP-0.0.1.dist-info/METADATA,sha256=3S6Fz7zBR_Pz3T6Hnyq9Ttu-PknuwdFr6NJ-pqLuC7E,991
|
|
5
|
+
StackGP-0.0.1.dist-info/WHEEL,sha256=-oYQCr74JF3a37z2nRlQays_SX2MqOANoqVjBBAP2yE,91
|
|
6
|
+
StackGP-0.0.1.dist-info/top_level.txt,sha256=ZMnq1T0y1D49ZgYmvTDZDUx7lp_AInDPCgDJwODya-4,8
|
|
7
|
+
StackGP-0.0.1.dist-info/RECORD,,
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
StackGP
|