StackGP 0.0.12__tar.gz → 0.0.14__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {stackgp-0.0.12 → stackgp-0.0.14}/PKG-INFO +1 -1
- {stackgp-0.0.12 → stackgp-0.0.14}/StackGP/StackGP.py +84 -8
- {stackgp-0.0.12 → stackgp-0.0.14}/StackGP.egg-info/PKG-INFO +1 -1
- {stackgp-0.0.12 → stackgp-0.0.14}/pyproject.toml +1 -1
- {stackgp-0.0.12 → stackgp-0.0.14}/LICENSE +0 -0
- {stackgp-0.0.12 → stackgp-0.0.14}/README.md +0 -0
- {stackgp-0.0.12 → stackgp-0.0.14}/StackGP/__init__.py +0 -0
- {stackgp-0.0.12 → stackgp-0.0.14}/StackGP.egg-info/SOURCES.txt +0 -0
- {stackgp-0.0.12 → stackgp-0.0.14}/StackGP.egg-info/dependency_links.txt +0 -0
- {stackgp-0.0.12 → stackgp-0.0.14}/StackGP.egg-info/requires.txt +0 -0
- {stackgp-0.0.12 → stackgp-0.0.14}/StackGP.egg-info/top_level.txt +0 -0
- {stackgp-0.0.12 → stackgp-0.0.14}/setup.cfg +0 -0
|
@@ -20,8 +20,10 @@ from sklearn.cluster import KMeans #for clustering in ensemble definition
|
|
|
20
20
|
from scipy.optimize import minimize #for uncertainty maximization
|
|
21
21
|
from sympy import symbols, simplify, expand
|
|
22
22
|
import sympy as sym
|
|
23
|
-
|
|
24
|
-
|
|
23
|
+
try:
|
|
24
|
+
from IPython.display import display, clear_output
|
|
25
|
+
except:
|
|
26
|
+
pass
|
|
25
27
|
import signal #for timing out functions
|
|
26
28
|
from contextlib import contextmanager #for timing out functions
|
|
27
29
|
|
|
@@ -50,6 +52,10 @@ def exp(a):
|
|
|
50
52
|
# def sine(a,b):
|
|
51
53
|
# return np.sin(a)
|
|
52
54
|
def power(a,b):
|
|
55
|
+
if (type(a)==int or type(a)==float or type(a)==np.float64) and a==0:
|
|
56
|
+
return a/math.nan
|
|
57
|
+
if (type(a)==np.ndarray) and (0 in a):
|
|
58
|
+
return a/np.where(a==0,math.nan,a)
|
|
53
59
|
return a**b
|
|
54
60
|
def sqrt(a):
|
|
55
61
|
return np.sqrt(a)
|
|
@@ -109,7 +115,7 @@ def ranReal(a=20,b=-10):
|
|
|
109
115
|
############################
|
|
110
116
|
#Data Subsampling Methods
|
|
111
117
|
############################
|
|
112
|
-
def randomSubsample(x,y):
|
|
118
|
+
def randomSubsample(x,y, *args, **kwargs):
|
|
113
119
|
n=max(int(np.ceil(len(y)**(3/5))),3)
|
|
114
120
|
idx=np.random.choice(range(x.shape[1]),n,replace=False)
|
|
115
121
|
return np.array([i[idx] for i in x]),y[idx]
|
|
@@ -119,6 +125,46 @@ def generationProportionalSample(x,y,generation=100,generations=100):
|
|
|
119
125
|
idx=np.random.choice(range(x.shape[1]),n,replace=False)
|
|
120
126
|
return np.array([i[idx] for i in x]),y[idx]
|
|
121
127
|
|
|
128
|
+
def ordinalSample(x,y,generation=100,generations=100):
|
|
129
|
+
n=max(int(len(y)*generation/generations),3)
|
|
130
|
+
sortedIdx=np.argsort(y)
|
|
131
|
+
step=len(y)/(n-1)
|
|
132
|
+
idx=[sortedIdx[max(int(i*step)-1,0)] for i in range(n)]
|
|
133
|
+
return np.array([i[idx] for i in x]),y[idx]
|
|
134
|
+
|
|
135
|
+
def orderedSample(x,y,generation=100,generations=100):
|
|
136
|
+
n=max(int(len(y)*generation/generations),3)
|
|
137
|
+
idx=[i for i in range(n)]
|
|
138
|
+
return np.array([i[idx] for i in x]),y[idx]
|
|
139
|
+
|
|
140
|
+
def ordinalBalancedSample(x,y,generation=100,generations=100):
|
|
141
|
+
n=max(int(len(y)*generation/generations),3)
|
|
142
|
+
numBins=int(max(np.ceil(np.sqrt(n)),3))
|
|
143
|
+
bins=np.linspace(min(y),max(y),numBins+1)
|
|
144
|
+
binIdx=np.digitize(y,bins)-1
|
|
145
|
+
samplesPerBin=max(int(n/numBins),1)
|
|
146
|
+
idx=[]
|
|
147
|
+
for i in range(numBins):
|
|
148
|
+
binMembers=[j for j in range(len(y)) if binIdx[j]==i]
|
|
149
|
+
if len(binMembers)>0:
|
|
150
|
+
chosen=np.random.choice(binMembers,min(samplesPerBin,len(binMembers)),replace=False)
|
|
151
|
+
idx=idx+chosen.tolist()
|
|
152
|
+
return np.array([i[idx] for i in x]),y[idx]
|
|
153
|
+
|
|
154
|
+
def balancedSample(x,y, *args, **kwargs):
|
|
155
|
+
n=int(np.ceil(len(y)**(3/5)))
|
|
156
|
+
numBins=max(n,3)
|
|
157
|
+
bins=np.linspace(min(y),max(y),numBins+1)
|
|
158
|
+
binIdx=np.digitize(y,bins)-1
|
|
159
|
+
samplesPerBin=max(int(n/numBins),1)
|
|
160
|
+
idx=[]
|
|
161
|
+
for i in range(numBins):
|
|
162
|
+
binMembers=[j for j in range(len(y)) if binIdx[j]==i]
|
|
163
|
+
if len(binMembers)>0:
|
|
164
|
+
chosen=np.random.choice(binMembers,min(samplesPerBin,len(binMembers)),replace=False)
|
|
165
|
+
idx=idx+chosen.tolist()
|
|
166
|
+
return np.array([i[idx] for i in x]),y[idx]
|
|
167
|
+
|
|
122
168
|
import inspect
|
|
123
169
|
def getArity(func): #Returns the arity of a function: used for model evaluations
|
|
124
170
|
if func=="pop":
|
|
@@ -223,6 +269,8 @@ def evModHelper(varStack,opStack,tempStack,data): #Recursive helper function for
|
|
|
223
269
|
evModHelper.__doc__ = "evModHelper(varStack,opStack,tempStack,data) is a helper function for evaluateGPModel"
|
|
224
270
|
def rmse(model, inputData, response):
|
|
225
271
|
predictions = evaluateGPModel(model, inputData)
|
|
272
|
+
if not all(np.isfinite(predictions)) or any(np.iscomplex(predictions)):
|
|
273
|
+
return np.nan
|
|
226
274
|
return np.sqrt(np.mean((predictions - response) ** 2))
|
|
227
275
|
rmse.__doc__ = "rmse(model, input, response) is a fitness objective that evaluates the root mean squared error"
|
|
228
276
|
def binaryError(model, input, response):
|
|
@@ -576,16 +624,18 @@ removeIndeterminateModels.__doc__ = "removeIndeterminateModels(models) removes m
|
|
|
576
624
|
def sortModels(models):
|
|
577
625
|
return sorted(models, key=lambda m:m[2])
|
|
578
626
|
sortModels.__doc__ = "sortModels(models) sorts a model population by the models' accuracies"
|
|
579
|
-
def selectModels(models, selectionSize=0.5):
|
|
627
|
+
def selectModels(models, selectionSize=0.5, thresholds=None):
|
|
580
628
|
tMods=copy.deepcopy(models)
|
|
581
629
|
[modelToListForm(mod) for mod in tMods]
|
|
630
|
+
if thresholds is not None:
|
|
631
|
+
tMods=[mod for mod in tMods if all([mod[2][i]<=thresholds[i] for i in range(len(thresholds))])]
|
|
582
632
|
paretoModels=[]
|
|
583
633
|
if selectionSize<=1:
|
|
584
634
|
selection=selectionSize*len(models)
|
|
585
635
|
else:
|
|
586
636
|
selection=selectionSize
|
|
587
637
|
|
|
588
|
-
while len(paretoModels)<selection:
|
|
638
|
+
while len(paretoModels)<selection and len(tMods)>0:
|
|
589
639
|
front=paretoTournament(tMods)
|
|
590
640
|
paretoModels=paretoModels+front
|
|
591
641
|
for i in front:
|
|
@@ -665,7 +715,7 @@ def evolve(inputData, responseData, generations=100, ops=defaultOps(), const=def
|
|
|
665
715
|
else:
|
|
666
716
|
metrics=modelEvaluationMetrics
|
|
667
717
|
if dataSubsample:
|
|
668
|
-
inData,resData=samplingMethod(fullInput,fullResponse)
|
|
718
|
+
inData,resData=samplingMethod(fullInput,fullResponse,generations=generations,generation=i)
|
|
669
719
|
for mods in models:
|
|
670
720
|
setModelQuality(mods,inData,resData,modelEvaluationMetrics=metrics)
|
|
671
721
|
models=removeIndeterminateModels(models)
|
|
@@ -944,7 +994,7 @@ def activeLearning(func, dims, ranges,rangesP,eqNum=1,version=1,iterations=100):
|
|
|
944
994
|
file.close()
|
|
945
995
|
return -1
|
|
946
996
|
|
|
947
|
-
def plotModels(models):
|
|
997
|
+
def plotModels(models, modelExpression=False):
|
|
948
998
|
tMods=copy.deepcopy(models)
|
|
949
999
|
[modelToListForm(mod) for mod in tMods]
|
|
950
1000
|
paretoModels=paretoTournament(tMods)
|
|
@@ -965,7 +1015,11 @@ def plotModels(models):
|
|
|
965
1015
|
sc=plt.scatter(complexities,accuracies,color=colors)
|
|
966
1016
|
plt.xlabel("Complexity")
|
|
967
1017
|
plt.ylabel("1-R**2")
|
|
968
|
-
|
|
1018
|
+
|
|
1019
|
+
if modelExpression:
|
|
1020
|
+
names=[str(printGPModel(mod)) for mod in tMods]+[str(printGPModel(mod)) for mod in paretoModels]
|
|
1021
|
+
else:
|
|
1022
|
+
names = [str(mod) for mod in tMods]+[str(mod) for mod in paretoModels]
|
|
969
1023
|
|
|
970
1024
|
label = ax.annotate("", xy=(0,0), xytext=(np.min(complexities),np.mean([np.max(accuracies),np.min(accuracies)])),
|
|
971
1025
|
bbox=dict(boxstyle="round", fc="w"),
|
|
@@ -1169,3 +1223,25 @@ def runEpochs(x,y,epochs=5,**kwargs):
|
|
|
1169
1223
|
models+=evolve(x,y,**kwargs)
|
|
1170
1224
|
|
|
1171
1225
|
return sortModels(models)
|
|
1226
|
+
|
|
1227
|
+
|
|
1228
|
+
############################
|
|
1229
|
+
#Benchmarking
|
|
1230
|
+
############################
|
|
1231
|
+
def generateRandomBenchmark(numVars=5, numSamples=100, noiseLevel=0, opsChoices=defaultOps(), constChoices=defaultConst(), maxLength=10):
|
|
1232
|
+
|
|
1233
|
+
# Generate random input data
|
|
1234
|
+
inputData = np.random.rand(numVars, numSamples)
|
|
1235
|
+
|
|
1236
|
+
# Generate a random target function
|
|
1237
|
+
randomModel = generateRandomModel(numVars, opsChoices, constChoices, maxLength)
|
|
1238
|
+
|
|
1239
|
+
# Evaluate the model to get response data
|
|
1240
|
+
responseData = evaluateGPModel(randomModel, inputData)
|
|
1241
|
+
|
|
1242
|
+
# Add noise if specified
|
|
1243
|
+
if noiseLevel > 0:
|
|
1244
|
+
noise = np.random.normal(0, noiseLevel, size=responseData.shape)
|
|
1245
|
+
responseData += noise
|
|
1246
|
+
|
|
1247
|
+
return inputData, responseData, randomModel
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|