StackGP 0.0.4__tar.gz → 0.0.6__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {stackgp-0.0.4 → stackgp-0.0.6}/LICENSE +1 -1
- {stackgp-0.0.4 → stackgp-0.0.6}/PKG-INFO +3 -2
- {stackgp-0.0.4 → stackgp-0.0.6}/StackGP/StackGP.py +128 -7
- {stackgp-0.0.4 → stackgp-0.0.6}/StackGP.egg-info/PKG-INFO +3 -2
- {stackgp-0.0.4 → stackgp-0.0.6}/pyproject.toml +1 -1
- {stackgp-0.0.4 → stackgp-0.0.6}/README.md +0 -0
- {stackgp-0.0.4 → stackgp-0.0.6}/StackGP/__init__.py +0 -0
- {stackgp-0.0.4 → stackgp-0.0.6}/StackGP.egg-info/SOURCES.txt +0 -0
- {stackgp-0.0.4 → stackgp-0.0.6}/StackGP.egg-info/dependency_links.txt +0 -0
- {stackgp-0.0.4 → stackgp-0.0.6}/StackGP.egg-info/requires.txt +0 -0
- {stackgp-0.0.4 → stackgp-0.0.6}/StackGP.egg-info/top_level.txt +0 -0
- {stackgp-0.0.4 → stackgp-0.0.6}/setup.cfg +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
|
-
Metadata-Version: 2.
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
2
|
Name: StackGP
|
|
3
|
-
Version: 0.0.
|
|
3
|
+
Version: 0.0.6
|
|
4
4
|
Summary: A stack-based genetic programming system
|
|
5
5
|
Author-email: Nathan Haut <hautnath@msu.edu>
|
|
6
6
|
Project-URL: Homepage, https://github.com/hoolagans/StackGP
|
|
@@ -17,6 +17,7 @@ Requires-Dist: matplotlib
|
|
|
17
17
|
Requires-Dist: scipy
|
|
18
18
|
Requires-Dist: scikit-learn
|
|
19
19
|
Requires-Dist: sympy
|
|
20
|
+
Dynamic: license-file
|
|
20
21
|
|
|
21
22
|
# StackGP
|
|
22
23
|
A stack-based genetic programming system in Python
|
|
@@ -18,7 +18,12 @@ import dill
|
|
|
18
18
|
import os
|
|
19
19
|
from sklearn.cluster import KMeans #for clustering in ensemble definition
|
|
20
20
|
from scipy.optimize import minimize #for uncertainty maximization
|
|
21
|
-
from sympy import symbols
|
|
21
|
+
from sympy import symbols, simplify, expand
|
|
22
|
+
import sympy as sym
|
|
23
|
+
|
|
24
|
+
import signal #for timing out functions
|
|
25
|
+
from contextlib import contextmanager #for timing out functions
|
|
26
|
+
|
|
22
27
|
warnings.filterwarnings('ignore', '.*invalid value.*' )
|
|
23
28
|
warnings.filterwarnings('ignore', '.*overflow.*' )
|
|
24
29
|
warnings.filterwarnings('ignore', '.*divide by.*' )
|
|
@@ -195,6 +200,10 @@ def evModHelper(varStack,opStack,tempStack,data): #Recursive helper function for
|
|
|
195
200
|
|
|
196
201
|
return [stack1,stack2,stack3]
|
|
197
202
|
evModHelper.__doc__ = "evModHelper(varStack,opStack,tempStack,data) is a helper function for evaluateGPModel"
|
|
203
|
+
def rmse(model, inputData, response):
|
|
204
|
+
predictions = evaluateGPModel(model, inputData)
|
|
205
|
+
return np.sqrt(np.mean((predictions - response) ** 2))
|
|
206
|
+
rmse.__doc__ = "rmse(model, input, response) is a fitness objective that evaluates the root mean squared error"
|
|
198
207
|
def fitness(prog,data,response): # Fitness function using correlation
|
|
199
208
|
predicted=evaluateGPModel(prog,np.array(data))
|
|
200
209
|
if type(predicted)!=list and type(predicted)!=np.ndarray:
|
|
@@ -220,8 +229,111 @@ fitness.__doc__ = "fitness(program,data,response) returns the 1-R^2 value of a m
|
|
|
220
229
|
def stackGPModelComplexity(model,*args):
|
|
221
230
|
return len(model[0])+len(model[1])-model[0].tolist().count("pop")
|
|
222
231
|
stackGPModelComplexity.__doc__ = "stackGPModelComplexity(model) returns the complexity of the model"
|
|
232
|
+
|
|
233
|
+
###################### Timeout function for model complexity ######################
|
|
234
|
+
class TimeoutException(Exception): pass
|
|
235
|
+
|
|
236
|
+
@contextmanager
|
|
237
|
+
def time_limit(seconds):
|
|
238
|
+
def signal_handler(signum, frame):
|
|
239
|
+
raise TimeoutException("Timed out!")
|
|
240
|
+
signal.signal(signal.SIGALRM, signal_handler)
|
|
241
|
+
signal.alarm(seconds)
|
|
242
|
+
try:
|
|
243
|
+
yield
|
|
244
|
+
finally:
|
|
245
|
+
signal.alarm(0)
|
|
246
|
+
####################################################################################
|
|
247
|
+
|
|
248
|
+
# Compute Hess
|
|
249
|
+
def ComputeSymbolicHess(model,vars):
|
|
250
|
+
printedModel=sym.simplify(printGPModel(model))
|
|
251
|
+
if type(printedModel)==float:
|
|
252
|
+
return sym.matrices.dense.MutableDenseMatrix(np.zeros((vars,vars)))
|
|
253
|
+
hess=sym.hessian(printedModel, [symbols('x'+str(i)) for i in range(vars)])
|
|
254
|
+
return hess
|
|
255
|
+
|
|
256
|
+
def EvaluateHess(hess,vars,values):
|
|
257
|
+
numHess=hess.subs({symbols('x'+str(j)):values[j] for j in range(vars)})
|
|
258
|
+
hessN = np.array(numHess).astype(float)
|
|
259
|
+
rankN=np.linalg.matrix_rank(hessN,tol=0.0001*0.0001*10)
|
|
260
|
+
return rankN
|
|
261
|
+
|
|
262
|
+
def Approx2Deriv(model,values,diff1,diff2,positions): #maybe diff should be relative to the variation of each feature
|
|
263
|
+
term1=[values[i]+diff1 if i == positions[0] else values[i] for i in range(len(values))]
|
|
264
|
+
term1=[term1[i]+diff2 if i == positions[1] else term1[i] for i in range(len(term1))]
|
|
265
|
+
term2=[values[i]-diff1 if i == positions[0] else values[i] for i in range(len(values))]
|
|
266
|
+
term2=[term2[i]+diff2 if i == positions[1] else term2[i] for i in range(len(term2))]
|
|
267
|
+
term3=[values[i]+diff1 if i == positions[0] else values[i] for i in range(len(values))]
|
|
268
|
+
term3=[term3[i]-diff2 if i == positions[1] else term3[i] for i in range(len(term3))]
|
|
269
|
+
term4=[values[i]-diff1 if i == positions[0] else values[i] for i in range(len(values))]
|
|
270
|
+
term4=[term4[i]-diff2 if i == positions[1] else term4[i] for i in range(len(term4))]
|
|
271
|
+
return ((evaluateGPModel(model,term1)-evaluateGPModel(model,term2))/((2*diff1))
|
|
272
|
+
-(evaluateGPModel(model,term3)-evaluateGPModel(model,term4))/((2*diff1)))/(2*diff2)
|
|
273
|
+
|
|
274
|
+
def ApproxHessRank(model,vars,values,diff1=0.001,diff2=0.001):
|
|
275
|
+
hess=[[Approx2Deriv(model,values,diff1,diff2,[i,j]) for i in range(vars)] for j in range(vars)]
|
|
276
|
+
hessN = np.array(hess).astype(float)
|
|
277
|
+
rankN=np.linalg.matrix_rank(hessN,tol=0.0001*0.0001*10)
|
|
278
|
+
return rankN
|
|
279
|
+
|
|
280
|
+
#def HessRank(model,vars,values):
|
|
281
|
+
# try:
|
|
282
|
+
# with time_limit(.01):
|
|
283
|
+
# hess=ComputeSymbolicHess(model,vars)
|
|
284
|
+
# hess = EvaluateHess(hess,vars,values)
|
|
285
|
+
# #print(hess)
|
|
286
|
+
# return hess
|
|
287
|
+
# except TimeoutException as e:
|
|
288
|
+
# hess=ApproxHessRank(model,vars,values)
|
|
289
|
+
#print(hess)
|
|
290
|
+
# return hess
|
|
291
|
+
|
|
292
|
+
def HessRank(model,vars,values):
|
|
293
|
+
hess=ApproxHessRank(model,vars,values)
|
|
294
|
+
return hess
|
|
295
|
+
|
|
296
|
+
|
|
297
|
+
|
|
298
|
+
|
|
299
|
+
|
|
300
|
+
# Counts basis terms in a model
|
|
301
|
+
def count_basis_terms(equation, expand=False):
|
|
302
|
+
try:
|
|
303
|
+
with time_limit(2):
|
|
304
|
+
|
|
305
|
+
|
|
306
|
+
if expand:
|
|
307
|
+
# Simplify the equation to standardize the expression
|
|
308
|
+
simplified_eq = simplify(equation)
|
|
309
|
+
# Expand the expression to identify additive terms clearly
|
|
310
|
+
expanded_eq = expand(simplified_eq)
|
|
311
|
+
|
|
312
|
+
# Separate the terms of the expression
|
|
313
|
+
terms = expanded_eq.as_ordered_terms()
|
|
314
|
+
else:
|
|
315
|
+
terms = equation.as_ordered_terms()
|
|
316
|
+
#print(terms)
|
|
317
|
+
|
|
318
|
+
except TimeoutException as e:
|
|
319
|
+
return 1000
|
|
320
|
+
return len(terms)
|
|
321
|
+
|
|
322
|
+
# Determines the number of basis functions in a model by counting +s and -s
|
|
323
|
+
def basisFunctionComplexity(model,vars, values,*args):
|
|
324
|
+
try: # values should be max, min, and median with respect to response variable
|
|
325
|
+
return HessRank(model,vars,values)#count_basis_terms(printGPModel(model))
|
|
326
|
+
except:
|
|
327
|
+
return 1000
|
|
328
|
+
|
|
329
|
+
# Creates a lambda function to be used as a complexity metric when given a target dimensionality and deviation
|
|
330
|
+
def basisFunctionComplexityDiff(target, deviation, vars, low, mid, high):
|
|
331
|
+
return lambda model,*args: max(np.mean([abs(basisFunctionComplexity(model,vars,low)-target),abs(basisFunctionComplexity(model,vars,mid)-target) ,abs(basisFunctionComplexity(model,vars,high)-target)] ),(deviation))-deviation
|
|
332
|
+
|
|
333
|
+
|
|
223
334
|
def setModelQuality(model,inputData,response,modelEvaluationMetrics=[fitness,stackGPModelComplexity]):
|
|
224
335
|
model[2]=[i(model,inputData,response) for i in modelEvaluationMetrics]
|
|
336
|
+
|
|
225
337
|
|
|
226
338
|
setModelQuality.__doc__ = "setModelQuality(model, inputdata, response, metrics=[r2,size]) is an inplace operator that sets a models quality"
|
|
227
339
|
def stackPass(model,pt):
|
|
@@ -493,7 +605,7 @@ def alignGPModel(model, data, response): #Aligns a model
|
|
|
493
605
|
if np.isnan(np.array(prediction)).any() or np.isnan(np.array(response)).any() or not np.isfinite(np.array(prediction,dtype=np.float32)).all():
|
|
494
606
|
return model
|
|
495
607
|
try:
|
|
496
|
-
align=np.round(np.polyfit(prediction,response,1,rcond=1e-16),decimals=14)
|
|
608
|
+
align=np.polyfit(prediction,response,1,rcond=1e-16)#np.round(np.polyfit(prediction,response,1,rcond=1e-16),decimals=14)
|
|
497
609
|
except np.linalg.LinAlgError:
|
|
498
610
|
#print("Alignment failed for: ", model, " with prediction: ", prediction, "and reference data: ", response)
|
|
499
611
|
return model
|
|
@@ -503,8 +615,10 @@ def alignGPModel(model, data, response): #Aligns a model
|
|
|
503
615
|
setModelQuality(newModel,data,response)
|
|
504
616
|
return newModel
|
|
505
617
|
alignGPModel.__doc__ = "alignGPModel(model, input, response) aligns a model such that response-a*f(x)+b are minimized over a and b"
|
|
506
|
-
def evolve(inputData, responseData, generations=100, ops=defaultOps(), const=defaultConst(), variableNames=[], mutationRate=79, crossoverRate=11, spawnRate=10, extinction=False,extinctionRate=10,elitismRate=50,popSize=300,maxComplexity=100,align=True,initialPop=[],timeLimit=300,capTime=False,tourneySize=5,tracking=False,modelEvaluationMetrics=[fitness,stackGPModelComplexity],dataSubsample=False,samplingMethod=randomSubsample):
|
|
618
|
+
def evolve(inputData, responseData, generations=100, ops=defaultOps(), const=defaultConst(), variableNames=[], mutationRate=79, crossoverRate=11, spawnRate=10, extinction=False,extinctionRate=10,elitismRate=50,popSize=300,maxComplexity=100,align=True,initialPop=[],timeLimit=300,capTime=False,tourneySize=5,tracking=False,modelEvaluationMetrics=[fitness,stackGPModelComplexity],dataSubsample=False,samplingMethod=randomSubsample,alternateObjectives=[],alternateObjFrequency=10,allowEarlyTermination=False,earlyTerminationThreshold=0):
|
|
507
619
|
|
|
620
|
+
metrics=modelEvaluationMetrics
|
|
621
|
+
|
|
508
622
|
fullInput,fullResponse=copy.deepcopy(inputData),copy.deepcopy(responseData)
|
|
509
623
|
inData=copy.deepcopy(fullInput)
|
|
510
624
|
resData=copy.deepcopy(fullResponse)
|
|
@@ -516,11 +630,18 @@ def evolve(inputData, responseData, generations=100, ops=defaultOps(), const=def
|
|
|
516
630
|
for i in range(generations):
|
|
517
631
|
if capTime and time.perf_counter()-startTime>timeLimit:
|
|
518
632
|
break
|
|
633
|
+
if len(alternateObjectives)>0 and (i+1)%alternateObjFrequency==0:
|
|
634
|
+
metrics=modelEvaluationMetrics[:1]+alternateObjectives
|
|
635
|
+
else:
|
|
636
|
+
metrics=modelEvaluationMetrics
|
|
519
637
|
if dataSubsample:
|
|
520
638
|
inData,resData=samplingMethod(fullInput,fullResponse)
|
|
521
639
|
for mods in models:
|
|
522
|
-
setModelQuality(mods,inData,resData,modelEvaluationMetrics=
|
|
640
|
+
setModelQuality(mods,inData,resData,modelEvaluationMetrics=metrics)
|
|
523
641
|
models=removeIndeterminateModels(models)
|
|
642
|
+
if allowEarlyTermination and min([mods[2][0] for mods in models])<=earlyTerminationThreshold:
|
|
643
|
+
print("Early termination at generation ", i)
|
|
644
|
+
break
|
|
524
645
|
if tracking:
|
|
525
646
|
bestFits.append(min([mods[2][0] for mods in paretoTournament(models)]))
|
|
526
647
|
|
|
@@ -529,7 +650,7 @@ def evolve(inputData, responseData, generations=100, ops=defaultOps(), const=def
|
|
|
529
650
|
if extinction and i%extinctionRate:
|
|
530
651
|
models=initializeGPModels(variableCount,ops,const,popSize)
|
|
531
652
|
for mods in models:
|
|
532
|
-
setModelQuality(mods,inData,resData,modelEvaluationMetrics=
|
|
653
|
+
setModelQuality(mods,inData,resData,modelEvaluationMetrics=metrics)
|
|
533
654
|
|
|
534
655
|
models=tournamentModelSelection(models,popSize,tourneySize)
|
|
535
656
|
|
|
@@ -560,7 +681,7 @@ def evolve(inputData, responseData, generations=100, ops=defaultOps(), const=def
|
|
|
560
681
|
|
|
561
682
|
|
|
562
683
|
for mods in models:
|
|
563
|
-
setModelQuality(mods,fullInput,fullResponse,modelEvaluationMetrics=modelEvaluationMetrics)
|
|
684
|
+
setModelQuality(mods,fullInput,fullResponse,modelEvaluationMetrics=modelEvaluationMetrics+alternateObjectives)
|
|
564
685
|
models=[trimModel(mod) for mod in models]
|
|
565
686
|
models=deleteDuplicateModels(models)
|
|
566
687
|
models=removeIndeterminateModels(models)
|
|
@@ -995,4 +1116,4 @@ def runEpochs(x,y,epochs=5,**kwargs):
|
|
|
995
1116
|
for i in range(epochs):
|
|
996
1117
|
models+=evolve(x,y,**kwargs)
|
|
997
1118
|
|
|
998
|
-
return sortModels(models)
|
|
1119
|
+
return sortModels(models)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
|
-
Metadata-Version: 2.
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
2
|
Name: StackGP
|
|
3
|
-
Version: 0.0.
|
|
3
|
+
Version: 0.0.6
|
|
4
4
|
Summary: A stack-based genetic programming system
|
|
5
5
|
Author-email: Nathan Haut <hautnath@msu.edu>
|
|
6
6
|
Project-URL: Homepage, https://github.com/hoolagans/StackGP
|
|
@@ -17,6 +17,7 @@ Requires-Dist: matplotlib
|
|
|
17
17
|
Requires-Dist: scipy
|
|
18
18
|
Requires-Dist: scikit-learn
|
|
19
19
|
Requires-Dist: sympy
|
|
20
|
+
Dynamic: license-file
|
|
20
21
|
|
|
21
22
|
# StackGP
|
|
22
23
|
A stack-based genetic programming system in Python
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|