PyPI - StackGP - Versions diffs - 0.0.12__tar.gz → 0.0.14__tar.gz - Mend

@@ -20,8 +20,10 @@ from sklearn.cluster import KMeans #for clustering in ensemble definition
 from scipy.optimize import minimize #for uncertainty maximization
 from sympy import symbols, simplify, expand
 import sympy as sym
-from IPython.display import display, clear_output
+try:
+    from IPython.display import display, clear_output
+except:
+    pass
 import signal #for timing out functions
 from contextlib import contextmanager #for timing out functions
@@ -50,6 +52,10 @@ def exp(a):
 # def sine(a,b):
 #     return np.sin(a)
 def power(a,b):
+    if (type(a)==int or type(a)==float or type(a)==np.float64) and a==0:
+        return a/math.nan
+    if (type(a)==np.ndarray) and (0 in a):
+        return a/np.where(a==0,math.nan,a)
     return a**b
 def sqrt(a):
     return np.sqrt(a)
@@ -109,7 +115,7 @@ def ranReal(a=20,b=-10):
 ############################
 #Data Subsampling Methods
 ############################
-def randomSubsample(x,y):
+def randomSubsample(x,y, *args, **kwargs):
     n=max(int(np.ceil(len(y)**(3/5))),3)
     idx=np.random.choice(range(x.shape[1]),n,replace=False)
     return np.array([i[idx] for i in x]),y[idx]
@@ -119,6 +125,46 @@ def generationProportionalSample(x,y,generation=100,generations=100):
     idx=np.random.choice(range(x.shape[1]),n,replace=False)
     return np.array([i[idx] for i in x]),y[idx]
+def ordinalSample(x,y,generation=100,generations=100):
+    n=max(int(len(y)*generation/generations),3)
+    sortedIdx=np.argsort(y)
+    step=len(y)/(n-1)
+    idx=[sortedIdx[max(int(i*step)-1,0)] for i in range(n)]
+    return np.array([i[idx] for i in x]),y[idx]
+def orderedSample(x,y,generation=100,generations=100):
+    n=max(int(len(y)*generation/generations),3)
+    idx=[i for i in range(n)]
+    return np.array([i[idx] for i in x]),y[idx]
+def ordinalBalancedSample(x,y,generation=100,generations=100):
+    n=max(int(len(y)*generation/generations),3)
+    numBins=int(max(np.ceil(np.sqrt(n)),3))
+    bins=np.linspace(min(y),max(y),numBins+1)
+    binIdx=np.digitize(y,bins)-1
+    samplesPerBin=max(int(n/numBins),1)
+    idx=[]
+    for i in range(numBins):
+        binMembers=[j for j in range(len(y)) if binIdx[j]==i]
+        if len(binMembers)>0:
+            chosen=np.random.choice(binMembers,min(samplesPerBin,len(binMembers)),replace=False)
+            idx=idx+chosen.tolist()
+    return np.array([i[idx] for i in x]),y[idx]
+def balancedSample(x,y, *args, **kwargs):
+    n=int(np.ceil(len(y)**(3/5)))
+    numBins=max(n,3)
+    bins=np.linspace(min(y),max(y),numBins+1)
+    binIdx=np.digitize(y,bins)-1
+    samplesPerBin=max(int(n/numBins),1)
+    idx=[]
+    for i in range(numBins):
+        binMembers=[j for j in range(len(y)) if binIdx[j]==i]
+        if len(binMembers)>0:
+            chosen=np.random.choice(binMembers,min(samplesPerBin,len(binMembers)),replace=False)
+            idx=idx+chosen.tolist()
+    return np.array([i[idx] for i in x]),y[idx]
 import inspect
 def getArity(func): #Returns the arity of a function: used for model evaluations
     if func=="pop":
@@ -223,6 +269,8 @@ def evModHelper(varStack,opStack,tempStack,data): #Recursive helper function for
 evModHelper.__doc__ = "evModHelper(varStack,opStack,tempStack,data) is a helper function for evaluateGPModel"
 def rmse(model, inputData, response):
     predictions = evaluateGPModel(model, inputData)
+    if not all(np.isfinite(predictions)) or any(np.iscomplex(predictions)):
+        return np.nan
     return np.sqrt(np.mean((predictions - response) ** 2))
 rmse.__doc__ = "rmse(model, input, response) is a fitness objective that evaluates the root mean squared error"
 def binaryError(model, input, response):
@@ -576,16 +624,18 @@ removeIndeterminateModels.__doc__ = "removeIndeterminateModels(models) removes m
 def sortModels(models):
     return sorted(models, key=lambda m:m[2])
 sortModels.__doc__ = "sortModels(models) sorts a model population by the models' accuracies"
-def selectModels(models, selectionSize=0.5):
+def selectModels(models, selectionSize=0.5, thresholds=None):
     tMods=copy.deepcopy(models)
     [modelToListForm(mod) for mod in tMods]
+    if thresholds is not None:
+        tMods=[mod for mod in tMods if all([mod[2][i]<=thresholds[i] for i in range(len(thresholds))])]
     paretoModels=[]
     if selectionSize<=1:
         selection=selectionSize*len(models)
     else:
         selection=selectionSize
-    while len(paretoModels)<selection:
+    while len(paretoModels)<selection and len(tMods)>0:
         front=paretoTournament(tMods)
         paretoModels=paretoModels+front
         for i in front:
@@ -665,7 +715,7 @@ def evolve(inputData, responseData, generations=100, ops=defaultOps(), const=def
         else:
             metrics=modelEvaluationMetrics
         if dataSubsample:
-            inData,resData=samplingMethod(fullInput,fullResponse)
+            inData,resData=samplingMethod(fullInput,fullResponse,generations=generations,generation=i)
         for mods in models:
             setModelQuality(mods,inData,resData,modelEvaluationMetrics=metrics)
         models=removeIndeterminateModels(models)
@@ -944,7 +994,7 @@ def activeLearning(func, dims, ranges,rangesP,eqNum=1,version=1,iterations=100):
         file.close()
         return -1
-def plotModels(models):
+def plotModels(models, modelExpression=False):
     tMods=copy.deepcopy(models)
     [modelToListForm(mod) for mod in tMods]
     paretoModels=paretoTournament(tMods)
@@ -965,7 +1015,11 @@ def plotModels(models):
     sc=plt.scatter(complexities,accuracies,color=colors)
     plt.xlabel("Complexity")
     plt.ylabel("1-R**2")
-    names=[str(printGPModel(mod)) for mod in tMods]+[str(printGPModel(mod)) for mod in paretoModels]
+    if modelExpression:
+        names=[str(printGPModel(mod)) for mod in tMods]+[str(printGPModel(mod)) for mod in paretoModels]
+    else:
+        names = [str(mod) for mod in tMods]+[str(mod) for mod in paretoModels]
     label = ax.annotate("", xy=(0,0), xytext=(np.min(complexities),np.mean([np.max(accuracies),np.min(accuracies)])),
                         bbox=dict(boxstyle="round", fc="w"),
@@ -1169,3 +1223,25 @@ def runEpochs(x,y,epochs=5,**kwargs):
         models+=evolve(x,y,**kwargs)
     return sortModels(models)
+############################
+#Benchmarking
+############################
+def generateRandomBenchmark(numVars=5, numSamples=100, noiseLevel=0, opsChoices=defaultOps(), constChoices=defaultConst(), maxLength=10):
+    # Generate random input data
+    inputData = np.random.rand(numVars, numSamples)
+    # Generate a random target function
+    randomModel = generateRandomModel(numVars, opsChoices, constChoices, maxLength)
+    # Evaluate the model to get response data
+    responseData = evaluateGPModel(randomModel, inputData)
+    # Add noise if specified
+    if noiseLevel > 0:
+        noise = np.random.normal(0, noiseLevel, size=responseData.shape)
+        responseData += noise
+    return inputData, responseData, randomModel

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: StackGP
-Version: 0.0.12
+Version: 0.0.14
 Summary: A stack-based genetic programming system
 Author-email: Nathan Haut <hautnath@msu.edu>
 Project-URL: Homepage, https://github.com/hoolagans/StackGP

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: StackGP
-Version: 0.0.12
+Version: 0.0.14
 Summary: A stack-based genetic programming system
 Author-email: Nathan Haut <hautnath@msu.edu>
 Project-URL: Homepage, https://github.com/hoolagans/StackGP

@@ -1,6 +1,6 @@
 [project]
 name = "StackGP"
-version = "0.0.12"
+version = "0.0.14"
 authors = [
   { name="Nathan Haut", email="hautnath@msu.edu" },
 ]

StackGP 0.0.12__tar.gz → 0.0.14__tar.gz

StackGP 0.0.12tar.gz → 0.0.14tar.gz