StackGP 0.0.4__tar.gz → 0.0.6__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  MIT License
2
2
 
3
- Copyright (c) 2023 Nathan Haut
3
+ Copyright (c) 2025 Nathan Haut
4
4
 
5
5
  Permission is hereby granted, free of charge, to any person obtaining a copy
6
6
  of this software and associated documentation files (the "Software"), to deal
@@ -1,6 +1,6 @@
1
- Metadata-Version: 2.1
1
+ Metadata-Version: 2.4
2
2
  Name: StackGP
3
- Version: 0.0.4
3
+ Version: 0.0.6
4
4
  Summary: A stack-based genetic programming system
5
5
  Author-email: Nathan Haut <hautnath@msu.edu>
6
6
  Project-URL: Homepage, https://github.com/hoolagans/StackGP
@@ -17,6 +17,7 @@ Requires-Dist: matplotlib
17
17
  Requires-Dist: scipy
18
18
  Requires-Dist: scikit-learn
19
19
  Requires-Dist: sympy
20
+ Dynamic: license-file
20
21
 
21
22
  # StackGP
22
23
  A stack-based genetic programming system in Python
@@ -18,7 +18,12 @@ import dill
18
18
  import os
19
19
  from sklearn.cluster import KMeans #for clustering in ensemble definition
20
20
  from scipy.optimize import minimize #for uncertainty maximization
21
- from sympy import symbols
21
+ from sympy import symbols, simplify, expand
22
+ import sympy as sym
23
+
24
+ import signal #for timing out functions
25
+ from contextlib import contextmanager #for timing out functions
26
+
22
27
  warnings.filterwarnings('ignore', '.*invalid value.*' )
23
28
  warnings.filterwarnings('ignore', '.*overflow.*' )
24
29
  warnings.filterwarnings('ignore', '.*divide by.*' )
@@ -195,6 +200,10 @@ def evModHelper(varStack,opStack,tempStack,data): #Recursive helper function for
195
200
 
196
201
  return [stack1,stack2,stack3]
197
202
  evModHelper.__doc__ = "evModHelper(varStack,opStack,tempStack,data) is a helper function for evaluateGPModel"
203
+ def rmse(model, inputData, response):
204
+ predictions = evaluateGPModel(model, inputData)
205
+ return np.sqrt(np.mean((predictions - response) ** 2))
206
+ rmse.__doc__ = "rmse(model, input, response) is a fitness objective that evaluates the root mean squared error"
198
207
  def fitness(prog,data,response): # Fitness function using correlation
199
208
  predicted=evaluateGPModel(prog,np.array(data))
200
209
  if type(predicted)!=list and type(predicted)!=np.ndarray:
@@ -220,8 +229,111 @@ fitness.__doc__ = "fitness(program,data,response) returns the 1-R^2 value of a m
220
229
  def stackGPModelComplexity(model,*args):
221
230
  return len(model[0])+len(model[1])-model[0].tolist().count("pop")
222
231
  stackGPModelComplexity.__doc__ = "stackGPModelComplexity(model) returns the complexity of the model"
232
+
233
+ ###################### Timeout function for model complexity ######################
234
+ class TimeoutException(Exception): pass
235
+
236
+ @contextmanager
237
+ def time_limit(seconds):
238
+ def signal_handler(signum, frame):
239
+ raise TimeoutException("Timed out!")
240
+ signal.signal(signal.SIGALRM, signal_handler)
241
+ signal.alarm(seconds)
242
+ try:
243
+ yield
244
+ finally:
245
+ signal.alarm(0)
246
+ ####################################################################################
247
+
248
+ # Compute Hess
249
+ def ComputeSymbolicHess(model,vars):
250
+ printedModel=sym.simplify(printGPModel(model))
251
+ if type(printedModel)==float:
252
+ return sym.matrices.dense.MutableDenseMatrix(np.zeros((vars,vars)))
253
+ hess=sym.hessian(printedModel, [symbols('x'+str(i)) for i in range(vars)])
254
+ return hess
255
+
256
+ def EvaluateHess(hess,vars,values):
257
+ numHess=hess.subs({symbols('x'+str(j)):values[j] for j in range(vars)})
258
+ hessN = np.array(numHess).astype(float)
259
+ rankN=np.linalg.matrix_rank(hessN,tol=0.0001*0.0001*10)
260
+ return rankN
261
+
262
+ def Approx2Deriv(model,values,diff1,diff2,positions): #maybe diff should be relative to the variation of each feature
263
+ term1=[values[i]+diff1 if i == positions[0] else values[i] for i in range(len(values))]
264
+ term1=[term1[i]+diff2 if i == positions[1] else term1[i] for i in range(len(term1))]
265
+ term2=[values[i]-diff1 if i == positions[0] else values[i] for i in range(len(values))]
266
+ term2=[term2[i]+diff2 if i == positions[1] else term2[i] for i in range(len(term2))]
267
+ term3=[values[i]+diff1 if i == positions[0] else values[i] for i in range(len(values))]
268
+ term3=[term3[i]-diff2 if i == positions[1] else term3[i] for i in range(len(term3))]
269
+ term4=[values[i]-diff1 if i == positions[0] else values[i] for i in range(len(values))]
270
+ term4=[term4[i]-diff2 if i == positions[1] else term4[i] for i in range(len(term4))]
271
+ return ((evaluateGPModel(model,term1)-evaluateGPModel(model,term2))/((2*diff1))
272
+ -(evaluateGPModel(model,term3)-evaluateGPModel(model,term4))/((2*diff1)))/(2*diff2)
273
+
274
+ def ApproxHessRank(model,vars,values,diff1=0.001,diff2=0.001):
275
+ hess=[[Approx2Deriv(model,values,diff1,diff2,[i,j]) for i in range(vars)] for j in range(vars)]
276
+ hessN = np.array(hess).astype(float)
277
+ rankN=np.linalg.matrix_rank(hessN,tol=0.0001*0.0001*10)
278
+ return rankN
279
+
280
+ #def HessRank(model,vars,values):
281
+ # try:
282
+ # with time_limit(.01):
283
+ # hess=ComputeSymbolicHess(model,vars)
284
+ # hess = EvaluateHess(hess,vars,values)
285
+ # #print(hess)
286
+ # return hess
287
+ # except TimeoutException as e:
288
+ # hess=ApproxHessRank(model,vars,values)
289
+ #print(hess)
290
+ # return hess
291
+
292
+ def HessRank(model,vars,values):
293
+ hess=ApproxHessRank(model,vars,values)
294
+ return hess
295
+
296
+
297
+
298
+
299
+
300
+ # Counts basis terms in a model
301
+ def count_basis_terms(equation, expand=False):
302
+ try:
303
+ with time_limit(2):
304
+
305
+
306
+ if expand:
307
+ # Simplify the equation to standardize the expression
308
+ simplified_eq = simplify(equation)
309
+ # Expand the expression to identify additive terms clearly
310
+ expanded_eq = expand(simplified_eq)
311
+
312
+ # Separate the terms of the expression
313
+ terms = expanded_eq.as_ordered_terms()
314
+ else:
315
+ terms = equation.as_ordered_terms()
316
+ #print(terms)
317
+
318
+ except TimeoutException as e:
319
+ return 1000
320
+ return len(terms)
321
+
322
+ # Determines the number of basis functions in a model by counting +s and -s
323
+ def basisFunctionComplexity(model,vars, values,*args):
324
+ try: # values should be max, min, and median with respect to response variable
325
+ return HessRank(model,vars,values)#count_basis_terms(printGPModel(model))
326
+ except:
327
+ return 1000
328
+
329
+ # Creates a lambda function to be used as a complexity metric when given a target dimensionality and deviation
330
+ def basisFunctionComplexityDiff(target, deviation, vars, low, mid, high):
331
+ return lambda model,*args: max(np.mean([abs(basisFunctionComplexity(model,vars,low)-target),abs(basisFunctionComplexity(model,vars,mid)-target) ,abs(basisFunctionComplexity(model,vars,high)-target)] ),(deviation))-deviation
332
+
333
+
223
334
  def setModelQuality(model,inputData,response,modelEvaluationMetrics=[fitness,stackGPModelComplexity]):
224
335
  model[2]=[i(model,inputData,response) for i in modelEvaluationMetrics]
336
+
225
337
 
226
338
  setModelQuality.__doc__ = "setModelQuality(model, inputdata, response, metrics=[r2,size]) is an inplace operator that sets a models quality"
227
339
  def stackPass(model,pt):
@@ -493,7 +605,7 @@ def alignGPModel(model, data, response): #Aligns a model
493
605
  if np.isnan(np.array(prediction)).any() or np.isnan(np.array(response)).any() or not np.isfinite(np.array(prediction,dtype=np.float32)).all():
494
606
  return model
495
607
  try:
496
- align=np.round(np.polyfit(prediction,response,1,rcond=1e-16),decimals=14)
608
+ align=np.polyfit(prediction,response,1,rcond=1e-16)#np.round(np.polyfit(prediction,response,1,rcond=1e-16),decimals=14)
497
609
  except np.linalg.LinAlgError:
498
610
  #print("Alignment failed for: ", model, " with prediction: ", prediction, "and reference data: ", response)
499
611
  return model
@@ -503,8 +615,10 @@ def alignGPModel(model, data, response): #Aligns a model
503
615
  setModelQuality(newModel,data,response)
504
616
  return newModel
505
617
  alignGPModel.__doc__ = "alignGPModel(model, input, response) aligns a model such that response-a*f(x)+b are minimized over a and b"
506
- def evolve(inputData, responseData, generations=100, ops=defaultOps(), const=defaultConst(), variableNames=[], mutationRate=79, crossoverRate=11, spawnRate=10, extinction=False,extinctionRate=10,elitismRate=50,popSize=300,maxComplexity=100,align=True,initialPop=[],timeLimit=300,capTime=False,tourneySize=5,tracking=False,modelEvaluationMetrics=[fitness,stackGPModelComplexity],dataSubsample=False,samplingMethod=randomSubsample):
618
+ def evolve(inputData, responseData, generations=100, ops=defaultOps(), const=defaultConst(), variableNames=[], mutationRate=79, crossoverRate=11, spawnRate=10, extinction=False,extinctionRate=10,elitismRate=50,popSize=300,maxComplexity=100,align=True,initialPop=[],timeLimit=300,capTime=False,tourneySize=5,tracking=False,modelEvaluationMetrics=[fitness,stackGPModelComplexity],dataSubsample=False,samplingMethod=randomSubsample,alternateObjectives=[],alternateObjFrequency=10,allowEarlyTermination=False,earlyTerminationThreshold=0):
507
619
 
620
+ metrics=modelEvaluationMetrics
621
+
508
622
  fullInput,fullResponse=copy.deepcopy(inputData),copy.deepcopy(responseData)
509
623
  inData=copy.deepcopy(fullInput)
510
624
  resData=copy.deepcopy(fullResponse)
@@ -516,11 +630,18 @@ def evolve(inputData, responseData, generations=100, ops=defaultOps(), const=def
516
630
  for i in range(generations):
517
631
  if capTime and time.perf_counter()-startTime>timeLimit:
518
632
  break
633
+ if len(alternateObjectives)>0 and (i+1)%alternateObjFrequency==0:
634
+ metrics=modelEvaluationMetrics[:1]+alternateObjectives
635
+ else:
636
+ metrics=modelEvaluationMetrics
519
637
  if dataSubsample:
520
638
  inData,resData=samplingMethod(fullInput,fullResponse)
521
639
  for mods in models:
522
- setModelQuality(mods,inData,resData,modelEvaluationMetrics=modelEvaluationMetrics)
640
+ setModelQuality(mods,inData,resData,modelEvaluationMetrics=metrics)
523
641
  models=removeIndeterminateModels(models)
642
+ if allowEarlyTermination and min([mods[2][0] for mods in models])<=earlyTerminationThreshold:
643
+ print("Early termination at generation ", i)
644
+ break
524
645
  if tracking:
525
646
  bestFits.append(min([mods[2][0] for mods in paretoTournament(models)]))
526
647
 
@@ -529,7 +650,7 @@ def evolve(inputData, responseData, generations=100, ops=defaultOps(), const=def
529
650
  if extinction and i%extinctionRate:
530
651
  models=initializeGPModels(variableCount,ops,const,popSize)
531
652
  for mods in models:
532
- setModelQuality(mods,inData,resData,modelEvaluationMetrics=modelEvaluationMetrics)
653
+ setModelQuality(mods,inData,resData,modelEvaluationMetrics=metrics)
533
654
 
534
655
  models=tournamentModelSelection(models,popSize,tourneySize)
535
656
 
@@ -560,7 +681,7 @@ def evolve(inputData, responseData, generations=100, ops=defaultOps(), const=def
560
681
 
561
682
 
562
683
  for mods in models:
563
- setModelQuality(mods,fullInput,fullResponse,modelEvaluationMetrics=modelEvaluationMetrics)
684
+ setModelQuality(mods,fullInput,fullResponse,modelEvaluationMetrics=modelEvaluationMetrics+alternateObjectives)
564
685
  models=[trimModel(mod) for mod in models]
565
686
  models=deleteDuplicateModels(models)
566
687
  models=removeIndeterminateModels(models)
@@ -995,4 +1116,4 @@ def runEpochs(x,y,epochs=5,**kwargs):
995
1116
  for i in range(epochs):
996
1117
  models+=evolve(x,y,**kwargs)
997
1118
 
998
- return sortModels(models)
1119
+ return sortModels(models)
@@ -1,6 +1,6 @@
1
- Metadata-Version: 2.1
1
+ Metadata-Version: 2.4
2
2
  Name: StackGP
3
- Version: 0.0.4
3
+ Version: 0.0.6
4
4
  Summary: A stack-based genetic programming system
5
5
  Author-email: Nathan Haut <hautnath@msu.edu>
6
6
  Project-URL: Homepage, https://github.com/hoolagans/StackGP
@@ -17,6 +17,7 @@ Requires-Dist: matplotlib
17
17
  Requires-Dist: scipy
18
18
  Requires-Dist: scikit-learn
19
19
  Requires-Dist: sympy
20
+ Dynamic: license-file
20
21
 
21
22
  # StackGP
22
23
  A stack-based genetic programming system in Python
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "StackGP"
3
- version = "0.0.4"
3
+ version = "0.0.6"
4
4
  authors = [
5
5
  { name="Nathan Haut", email="hautnath@msu.edu" },
6
6
  ]
File without changes
File without changes
File without changes