pyeggp 1.0.2__cp311-cp311-win_amd64.whl → 1.0.4__cp311-cp311-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pyeggp/__init__.py +228 -5
- pyeggp/_binding.cp311-win_amd64.pyd +0 -0
- {pyeggp-1.0.2.dist-info → pyeggp-1.0.4.dist-info}/DELVEWHEEL +1 -1
- {pyeggp-1.0.2.dist-info → pyeggp-1.0.4.dist-info}/METADATA +170 -5
- {pyeggp-1.0.2.dist-info → pyeggp-1.0.4.dist-info}/RECORD +10 -10
- {pyeggp-1.0.2.dist-info → pyeggp-1.0.4.dist-info}/WHEEL +1 -1
- pyeggp.libs/{libnlopt-d04b99da04931c2c76fbfa6fc8945575.dll → libnlopt-58b7f3854da0d511f4c514d3d8ae1baf.dll} +0 -0
- {pyeggp-1.0.2.dist-info → pyeggp-1.0.4.dist-info}/entry_points.txt +0 -0
- {pyeggp-1.0.2.dist-info → pyeggp-1.0.4.dist-info}/licenses/LICENSE +0 -0
- {pyeggp-1.0.2.dist-info → pyeggp-1.0.4.dist-info}/top_level.txt +0 -0
pyeggp/__init__.py
CHANGED
@@ -13,6 +13,7 @@ import atexit
|
|
13
13
|
from contextlib import contextmanager
|
14
14
|
from threading import Lock
|
15
15
|
from typing import Iterator, List
|
16
|
+
import string
|
16
17
|
from io import StringIO
|
17
18
|
import tempfile
|
18
19
|
import csv
|
@@ -31,7 +32,7 @@ from ._binding import (
|
|
31
32
|
unsafe_hs_pyeggp_exit,
|
32
33
|
)
|
33
34
|
|
34
|
-
VERSION: str = "1.
|
35
|
+
VERSION: str = "1.0.4"
|
35
36
|
|
36
37
|
|
37
38
|
_hs_rts_init: bool = False
|
@@ -69,8 +70,140 @@ def pyeggp_run(dataset: str, gen: int, nPop: int, maxSize: int, nTournament: int
|
|
69
70
|
with hs_rts_init():
|
70
71
|
return unsafe_hs_pyeggp_run(dataset, gen, nPop, maxSize, nTournament, pc, pm, nonterminals, loss, optIter, optRepeat, nParams, split, simplify, dumpTo, loadFrom)
|
71
72
|
|
73
|
+
def make_function(expression, loss="MSE"):
|
74
|
+
def func(x, t):
|
75
|
+
y = eval(expression)
|
76
|
+
if loss == "Bernoulli":
|
77
|
+
return 1/(1 + np.exp(-y))
|
78
|
+
elif loss == "Poisson":
|
79
|
+
return np.exp(y)
|
80
|
+
return y
|
81
|
+
return func
|
82
|
+
|
72
83
|
class PyEGGP(BaseEstimator, RegressorMixin):
|
84
|
+
""" Builds a symbolic regression model using eggp.
|
85
|
+
|
86
|
+
Parameters
|
87
|
+
----------
|
88
|
+
gen : int, default=100
|
89
|
+
The number of generations.
|
90
|
+
|
91
|
+
nPop : int, default=100
|
92
|
+
Population size.
|
93
|
+
|
94
|
+
maxSize : int, default=15
|
95
|
+
Maximum allowed size for the expression.
|
96
|
+
This should not be larger than 100 as the e-graph may grow
|
97
|
+
too large.
|
98
|
+
|
99
|
+
nTournament : int, default=3
|
100
|
+
Tournament size. During parent selection it will
|
101
|
+
pick `nTournament` expressions at random and
|
102
|
+
return the best among them.
|
103
|
+
|
104
|
+
pc : float, default=0.9
|
105
|
+
Probability of performing the crossover operator.
|
106
|
+
|
107
|
+
pm : float, default=0.3
|
108
|
+
Probability of performing the mutation operator.
|
109
|
+
|
110
|
+
nonterminals : str, default="add,sub,mul,div"
|
111
|
+
String of a comma separated list of nonterminals.
|
112
|
+
These are the allowed functions to be used during the search.
|
113
|
+
Available functions: add,sub,mul,div,power,powerabs,aq,abs,sin,cos,
|
114
|
+
tan,sinh,cosh,tanh,asin,acos,atan,asinh,acosh,
|
115
|
+
atanh,sqrt,sqrtabs,cbrt,square,log,logabs,exp,
|
116
|
+
recip,cube.
|
117
|
+
Where `aq` is the analytical quotient (x/sqrt(1 + y^2)),
|
118
|
+
`powerabs` is the protected power (x^|y|)
|
119
|
+
`sqrtabs` is the protected sqrt (sqrt(|x|))
|
120
|
+
`logabs` is the protected log (log(|x|))
|
121
|
+
`recip` is the reciprocal (1/x)
|
122
|
+
`cbrt` is the cubic root
|
123
|
+
|
124
|
+
loss : {"MSE", "Gaussian", "Bernoulli", "Poisson"}, default="MSE"
|
125
|
+
Loss function used to evaluate the expressions:
|
126
|
+
- MSE (mean squared error) should be used for regression problems.
|
127
|
+
- Gaussian likelihood should be used for regression problem when you want to
|
128
|
+
fit the error term.
|
129
|
+
- Bernoulli likelihood should be used for classification problem.
|
130
|
+
- Poisson likelihood should be used when the data distribution follows a Poisson.
|
131
|
+
|
132
|
+
optIter : int, default=50
|
133
|
+
Number of iterations for the parameter optimization.
|
134
|
+
|
135
|
+
optRepeat : int, default=2
|
136
|
+
Number of restarts for the parameter optimization.
|
137
|
+
|
138
|
+
nParams : int, default=-1
|
139
|
+
Maximum number of parameters. If set to -1 it will
|
140
|
+
allow the expression to have any number of parameters.
|
141
|
+
If set to a number > 0, it will limit the number of parameters,
|
142
|
+
but allow it to appear multiple times in the expression.
|
143
|
+
E.g., t0 * x0 + exp(t0*x0 + t1)
|
144
|
+
|
145
|
+
split : int, default=1
|
146
|
+
How to split the data to create the validation set.
|
147
|
+
If set to 1, it will use the whole data for fitting the parameter and
|
148
|
+
calculating the fitness function.
|
149
|
+
If set to n>1, it will use 1/n for calculating the fitness function
|
150
|
+
and the reminder for fitting the parameter.
|
151
|
+
|
152
|
+
simplify : bool, default=False
|
153
|
+
Whether to apply a final step of equality saturation to simplify the expressions.
|
154
|
+
|
155
|
+
dumpTo : str, default=""
|
156
|
+
If not empty, it will save the final e-graph into the filename.
|
157
|
+
|
158
|
+
loadFrom : str, default=""
|
159
|
+
If not empty, it will load an e-graph and resume the search.
|
160
|
+
The user must ensure that the loaded e-graph is from the same
|
161
|
+
dataset and loss function.
|
162
|
+
|
163
|
+
Examples
|
164
|
+
--------
|
165
|
+
>>> from pyeggp import PyEGGP
|
166
|
+
>>> import numpy as np
|
167
|
+
>>> X = np.arange(100).reshape(100, 1)
|
168
|
+
>>> y = np.zeros((100, ))
|
169
|
+
>>> estimator = PyEGGP()
|
170
|
+
>>> estimator.fit(X, y)
|
171
|
+
>>>
|
172
|
+
>>> estimator = PyEGGP(loss="Bernoulli")
|
173
|
+
>>> estimator.fit(X, y)
|
174
|
+
"""
|
73
175
|
def __init__(self, gen = 100, nPop = 100, maxSize = 15, nTournament = 3, pc = 0.9, pm = 0.3, nonterminals = "add,sub,mul,div", loss = "MSE", optIter = 50, optRepeat = 2, nParams = -1, split = 1, simplify = False, dumpTo = "", loadFrom = ""):
|
176
|
+
nts = "add,sub,mul,div,power,powerabs,\
|
177
|
+
aq,abs,sin,cos,tan,sinh,cosh,tanh,\
|
178
|
+
asin,acos,atan,asinh,acosh,atanh,sqrt,\
|
179
|
+
sqrtabs,cbrt,square,log,logabs,exp,recip,cube"
|
180
|
+
losses = ["MSE", "Gaussian", "Bernoulli", "Poisson"]
|
181
|
+
if gen < 1:
|
182
|
+
raise ValueError('gen should be greater than 1')
|
183
|
+
if nPop < 1:
|
184
|
+
raise ValueError('nPop should be greater than 1')
|
185
|
+
if maxSize < 1 or maxSize > 100:
|
186
|
+
raise ValueError('maxSize should be a value between 1 and 100')
|
187
|
+
if nTournament < 1 or nTournament > nPop:
|
188
|
+
raise ValueError('nTournament should be a value between 1 and nPop')
|
189
|
+
if pc < 0 or pc > 1:
|
190
|
+
raise ValueError('pc should be between 0 and 1')
|
191
|
+
if pm < 0 or pm > 1:
|
192
|
+
raise ValueError('pm should be between 0 and 1')
|
193
|
+
if any(t not in nts for t in nonterminals):
|
194
|
+
raise ValueError('nonterminals must be a comma separated list of one or more of ', nts)
|
195
|
+
if loss not in losses:
|
196
|
+
raise ValueError('loss must be one of ', losses)
|
197
|
+
if optIter < 0:
|
198
|
+
raise ValueError('optIter must be a positive number')
|
199
|
+
if optRepeat < 0:
|
200
|
+
raise ValueError('optRepeat must be a positive number')
|
201
|
+
if nParams < -1:
|
202
|
+
raise ValueError('nParams must be either -1 or a positive number')
|
203
|
+
if split < 1:
|
204
|
+
raise ValueError('split must be equal or greater than 1')
|
205
|
+
if not isinstance(simplify, bool):
|
206
|
+
raise TypeError('simplify must be a boolean')
|
74
207
|
self.gen = gen
|
75
208
|
self.nPop = nPop
|
76
209
|
self.maxSize = maxSize
|
@@ -89,6 +222,15 @@ class PyEGGP(BaseEstimator, RegressorMixin):
|
|
89
222
|
self.is_fitted_ = False
|
90
223
|
|
91
224
|
def fit(self, X, y):
|
225
|
+
''' Fits the regression model.
|
226
|
+
|
227
|
+
Parameters
|
228
|
+
----------
|
229
|
+
X : np.array
|
230
|
+
An m x n np.array describing m observations of n features.
|
231
|
+
y : np.array
|
232
|
+
An np.array of size m with the measured target values.
|
233
|
+
'''
|
92
234
|
if X.ndim == 1:
|
93
235
|
X = X.reshape(-1,1)
|
94
236
|
y = y.reshape(-1, 1)
|
@@ -108,6 +250,15 @@ class PyEGGP(BaseEstimator, RegressorMixin):
|
|
108
250
|
return self
|
109
251
|
|
110
252
|
def fit_mvsr(self, Xs, ys):
|
253
|
+
''' Fits a multi-view regression model.
|
254
|
+
|
255
|
+
Parameters
|
256
|
+
----------
|
257
|
+
Xs : list(np.array)
|
258
|
+
A list with k elements of m_k x n np.arrays describing m_k observations of n features.
|
259
|
+
ys : list(np.array)
|
260
|
+
A list of k elements of np.arrays of size m_k with the measured target values.
|
261
|
+
'''
|
111
262
|
if Xs[0].ndim == 1:
|
112
263
|
Xs = [X.reshape(-1,1) for X in Xs]
|
113
264
|
ys = [y.reshape(-1, 1) for y in ys]
|
@@ -129,10 +280,43 @@ class PyEGGP(BaseEstimator, RegressorMixin):
|
|
129
280
|
return self
|
130
281
|
|
131
282
|
def predict(self, X):
|
283
|
+
''' Generates the prediction using the best model (selected by accuracy)
|
284
|
+
|
285
|
+
Parameters
|
286
|
+
----------
|
287
|
+
X : np.array
|
288
|
+
An m x n np.array describing m observations of n features.
|
289
|
+
This array must have the same number of features as the training data.
|
290
|
+
|
291
|
+
Return
|
292
|
+
------
|
293
|
+
y : np.array
|
294
|
+
A vector of predictions
|
295
|
+
|
296
|
+
A table with the fitted models and additional information
|
297
|
+
will be stored as a Pandas dataframe in self.results.
|
298
|
+
'''
|
132
299
|
check_is_fitted(self)
|
133
300
|
return self.evaluate_best_model(X)
|
134
301
|
|
135
302
|
def predict_mvsr(self, X, view):
|
303
|
+
''' Generates the prediction using the best model (selected by accuracy)
|
304
|
+
of the sepecified `view`
|
305
|
+
|
306
|
+
Parameters
|
307
|
+
----------
|
308
|
+
X : np.array
|
309
|
+
An m x n np.array describing m observations of n features.
|
310
|
+
This array must have the same number of features as the training data.
|
311
|
+
|
312
|
+
view : int
|
313
|
+
The index of the view (starting at 0).
|
314
|
+
|
315
|
+
Return
|
316
|
+
------
|
317
|
+
y : np.array
|
318
|
+
A vector of predictions
|
319
|
+
'''
|
136
320
|
check_is_fitted(self)
|
137
321
|
return self.evaluate_best_model_view(X, view)
|
138
322
|
|
@@ -140,26 +324,65 @@ class PyEGGP(BaseEstimator, RegressorMixin):
|
|
140
324
|
if x.ndim == 1:
|
141
325
|
x = x.reshape(-1,1)
|
142
326
|
t = np.array(list(map(float, self.results.iloc[-1].theta.split(";"))))
|
143
|
-
|
327
|
+
y = eval(self.results.iloc[-1].Numpy)
|
328
|
+
if self.loss == "Bernoulli":
|
329
|
+
return 1/(1 + np.exp(-y))
|
330
|
+
elif self.loss == "Poisson":
|
331
|
+
return np.exp(y)
|
332
|
+
return y
|
144
333
|
def evaluate_best_model_view(self, x, view):
|
145
334
|
if x.ndim == 1:
|
146
335
|
x = x.reshape(-1,1)
|
147
336
|
ix = self.results.iloc[-1].id
|
148
337
|
best = self.results[self.results.id==ix].iloc[view]
|
149
338
|
t = np.array(list(map(float, best.theta.split(";"))))
|
150
|
-
|
339
|
+
y = eval(best.Numpy)
|
340
|
+
if self.loss == "Bernoulli":
|
341
|
+
return 1/(1 + np.exp(-y))
|
342
|
+
elif self.loss == "Poisson":
|
343
|
+
return np.exp(y)
|
344
|
+
return y
|
151
345
|
|
152
346
|
def evaluate_model_view(self, x, ix, view):
|
153
347
|
if x.ndim == 1:
|
154
348
|
x = x.reshape(-1,1)
|
155
349
|
best = self.results[self.results.id==ix].iloc[view]
|
156
350
|
t = np.array(list(map(float, best.theta.split(";"))))
|
157
|
-
|
351
|
+
y = eval(best.Numpy)
|
352
|
+
if self.loss == "Bernoulli":
|
353
|
+
return 1/(1 + np.exp(-y))
|
354
|
+
elif self.loss == "Poisson":
|
355
|
+
return np.exp(y)
|
356
|
+
return y
|
158
357
|
def evaluate_model(self, ix, x):
|
159
358
|
if x.ndim == 1:
|
160
359
|
x = x.reshape(-1,1)
|
161
360
|
t = np.array(list(map(float, self.results.iloc[-1].theta.split(";"))))
|
162
|
-
|
361
|
+
y = eval(self.results.iloc[i].Numpy)
|
362
|
+
if self.loss == "Bernoulli":
|
363
|
+
return 1/(1 + np.exp(-y))
|
364
|
+
elif self.loss == "Poisson":
|
365
|
+
return np.exp(y)
|
366
|
+
return y
|
163
367
|
def score(self, X, y):
|
368
|
+
''' Calculates the score (single-view only).
|
369
|
+
'''
|
164
370
|
ypred = self.evaluate_best_model(X)
|
165
371
|
return r2_score(y, ypred)
|
372
|
+
def get_model(self, idx):
|
373
|
+
''' Get a `model` function and its visual representation. '''
|
374
|
+
alphabet = list(string.ascii_uppercase)
|
375
|
+
row = self.results[self.results['id']==idx].iloc[0]
|
376
|
+
visual_expression = row['Numpy']
|
377
|
+
model = make_function(visual_expression, self.loss)
|
378
|
+
n_params_used = len(row['theta'].split(sep=';'))
|
379
|
+
|
380
|
+
# Works for solutions with less than 26 parameters
|
381
|
+
for i in range(n_params_used):
|
382
|
+
visual_expression = visual_expression.replace(f't[{i}]', alphabet[i])
|
383
|
+
|
384
|
+
# Works for data with less than 50 dimensions
|
385
|
+
for i in range(50):
|
386
|
+
visual_expression = visual_expression.replace(f'x[:, {i}]', f'X{i}')
|
387
|
+
|
388
|
+
return model, visual_expression
|
Binary file
|
@@ -1,2 +1,2 @@
|
|
1
1
|
Version: 1.10.0
|
2
|
-
Arguments: ['C:\\hostedtoolcache\\windows\\Python\\3.12.9\\x64\\Scripts\\delvewheel', 'repair', '-w', 'C:\\Users\\runneradmin\\AppData\\Local\\Temp\\cibw-run-
|
2
|
+
Arguments: ['C:\\hostedtoolcache\\windows\\Python\\3.12.9\\x64\\Scripts\\delvewheel', 'repair', '-w', 'C:\\Users\\runneradmin\\AppData\\Local\\Temp\\cibw-run-ig5ht48y\\cp311-win_amd64\\repaired_wheel', 'C:\\Users\\runneradmin\\AppData\\Local\\Temp\\cibw-run-ig5ht48y\\cp311-win_amd64\\built_wheel\\pyeggp-1.0.4-cp311-cp311-win_amd64.whl', '--add-path', 'C:\\nlopt\\bin']
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: pyeggp
|
3
|
-
Version: 1.0.
|
3
|
+
Version: 1.0.4
|
4
4
|
Summary: Python Wheels for eggp algorithm.
|
5
5
|
Author-email: Fabricio Olivetti <folivetti@users.noreply.github.com>
|
6
6
|
License: GNU GENERAL PUBLIC LICENSE
|
@@ -695,16 +695,181 @@ Provides-Extra: mypy
|
|
695
695
|
Requires-Dist: types_setuptools>=45; extra == "mypy"
|
696
696
|
Dynamic: license-file
|
697
697
|
|
698
|
-
#
|
698
|
+
# PyEGGP
|
699
699
|
|
700
|
-
Python
|
700
|
+
A Python package for symbolic regression using e-graph-based genetic programming. PyEGGP provides a scikit-learn compatible API for evolutionary symbolic regression tasks.
|
701
701
|
|
702
|
-
|
702
|
+
More info [here](https://github.com/folivetti/srtree/tree/main/apps/eggp)
|
703
703
|
|
704
|
-
|
704
|
+
## Installation
|
705
705
|
|
706
706
|
```bash
|
707
707
|
pip install pyeggp
|
708
708
|
```
|
709
709
|
|
710
|
+
## Features
|
711
|
+
|
712
|
+
- Scikit-learn compatible API with `fit()` and `predict()` methods
|
713
|
+
- Genetic programming approach with e-graph representation
|
714
|
+
- Support for **multi-view symbolic regression** [see here](https://arxiv.org/abs/2402.04298)
|
715
|
+
- Customizable evolutionary parameters (population size, tournament selection, etc.)
|
716
|
+
- Flexible function set selection
|
717
|
+
- Various loss functions for different problem types
|
718
|
+
- Parameter optimization with multiple restarts
|
719
|
+
- Optional expression simplification through equality saturation
|
720
|
+
- Ability to save and load e-graphs
|
721
|
+
|
722
|
+
## Usage
|
723
|
+
|
724
|
+
### Basic Example
|
725
|
+
|
726
|
+
```python
|
727
|
+
from pyeggp import PyEGGP
|
728
|
+
import numpy as np
|
729
|
+
|
730
|
+
# Create sample data
|
731
|
+
X = np.linspace(-10, 10, 100).reshape(-1, 1)
|
732
|
+
y = 2 * X.ravel() + 3 * np.sin(X.ravel()) + np.random.normal(0, 1, 100)
|
733
|
+
|
734
|
+
# Create and fit the model
|
735
|
+
model = PyEGGP(gen=100, nonterminals="add,sub,mul,div,sin,cos")
|
736
|
+
model.fit(X, y)
|
737
|
+
|
738
|
+
# Make predictions
|
739
|
+
y_pred = model.predict(X)
|
740
|
+
|
741
|
+
# Examine the results
|
742
|
+
print(model.results)
|
743
|
+
```
|
744
|
+
|
745
|
+
### Multi-View Symbolic Regression
|
746
|
+
|
747
|
+
```python
|
748
|
+
from pyeggp import PyEGGP
|
749
|
+
import numpy as np
|
750
|
+
|
751
|
+
# Create multiple views of data
|
752
|
+
X1 = np.linspace(-5, 5, 50).reshape(-1, 1)
|
753
|
+
y1 = np.sin(X1.ravel()) + np.random.normal(0, 0.1, 50)
|
754
|
+
|
755
|
+
X2 = np.linspace(0, 10, 100).reshape(-1, 1)
|
756
|
+
y2 = np.sin(X2.ravel()) + np.random.normal(0, 0.2, 100)
|
757
|
+
|
758
|
+
# Create and fit multi-view model
|
759
|
+
model = PyEGGP(gen=150, nPop=200)
|
760
|
+
model.fit_mvsr([X1, X2], [y1, y2])
|
761
|
+
|
762
|
+
# Make predictions for each view
|
763
|
+
y_pred1 = model.predict_mvsr(X1, view=0)
|
764
|
+
y_pred2 = model.predict_mvsr(X2, view=1)
|
765
|
+
```
|
766
|
+
|
767
|
+
### Integration with scikit-learn
|
768
|
+
|
769
|
+
```python
|
770
|
+
from sklearn.model_selection import train_test_split
|
771
|
+
from sklearn.metrics import mean_squared_error
|
772
|
+
from pyeggp import PyEGGP
|
773
|
+
|
774
|
+
# Split data
|
775
|
+
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
|
776
|
+
|
777
|
+
# Create and fit model
|
778
|
+
model = PyEGGP(gen=150, nPop=150, optIter=100)
|
779
|
+
model.fit(X_train, y_train)
|
780
|
+
|
781
|
+
# Evaluate on test set
|
782
|
+
y_pred = model.predict(X_test)
|
783
|
+
mse = mean_squared_error(y_test, y_pred)
|
784
|
+
print(f"Test MSE: {mse}")
|
785
|
+
```
|
786
|
+
|
787
|
+
## Parameters
|
788
|
+
|
789
|
+
| Parameter | Type | Default | Description |
|
790
|
+
|-----------|------|---------|-------------|
|
791
|
+
| `gen` | int | 100 | Number of generations to run |
|
792
|
+
| `nPop` | int | 100 | Population size |
|
793
|
+
| `maxSize` | int | 15 | Maximum allowed size for expressions (max 100) |
|
794
|
+
| `nTournament` | int | 3 | Tournament size for parent selection |
|
795
|
+
| `pc` | float | 0.9 | Probability of performing crossover |
|
796
|
+
| `pm` | float | 0.3 | Probability of performing mutation |
|
797
|
+
| `nonterminals` | str | "add,sub,mul,div" | Comma-separated list of allowed functions |
|
798
|
+
| `loss` | str | "MSE" | Loss function: "MSE", "Gaussian", "Bernoulli", or "Poisson" |
|
799
|
+
| `optIter` | int | 50 | Number of iterations for parameter optimization |
|
800
|
+
| `optRepeat` | int | 2 | Number of restarts for parameter optimization |
|
801
|
+
| `nParams` | int | -1 | Maximum number of parameters (-1 for unlimited) |
|
802
|
+
| `split` | int | 1 | Data splitting ratio for validation |
|
803
|
+
| `simplify` | bool | False | Whether to apply equality saturation to simplify expressions |
|
804
|
+
| `dumpTo` | str | "" | Filename to save the final e-graph |
|
805
|
+
| `loadFrom` | str | "" | Filename to load an e-graph to resume search |
|
806
|
+
|
807
|
+
## Available Functions
|
808
|
+
|
809
|
+
The following functions can be used in the `nonterminals` parameter:
|
810
|
+
|
811
|
+
- Basic operations: `add`, `sub`, `mul`, `div`
|
812
|
+
- Powers: `power`, `powerabs`, `square`, `cube`
|
813
|
+
- Roots: `sqrt`, `sqrtabs`, `cbrt`
|
814
|
+
- Trigonometric: `sin`, `cos`, `tan`, `asin`, `acos`, `atan`
|
815
|
+
- Hyperbolic: `sinh`, `cosh`, `tanh`, `asinh`, `acosh`, `atanh`
|
816
|
+
- Others: `abs`, `log`, `logabs`, `exp`, `recip`, `aq` (analytical quotient)
|
817
|
+
|
818
|
+
## Methods
|
819
|
+
|
820
|
+
### Core Methods
|
821
|
+
- `fit(X, y)`: Fits the symbolic regression model
|
822
|
+
- `predict(X)`: Generates predictions using the best model
|
823
|
+
- `score(X, y)`: Computes R² score of the best model
|
824
|
+
|
825
|
+
### Multi-View Methods
|
826
|
+
- `fit_mvsr(Xs, ys)`: Fits a multi-view regression model
|
827
|
+
- `predict_mvsr(X, view)`: Generates predictions for a specific view
|
828
|
+
- `evaluate_best_model_view(X, view)`: Evaluates the best model on a specific view
|
829
|
+
- `evaluate_model_view(X, ix, view)`: Evaluates a specific model on a specific view
|
830
|
+
|
831
|
+
### Utility Methods
|
832
|
+
- `evaluate_best_model(X)`: Evaluates the best model on the given data
|
833
|
+
- `evaluate_model(ix, X)`: Evaluates the model with index `ix` on the given data
|
834
|
+
- `get_model(idx)`: Returns a model function and its visual representation
|
835
|
+
|
836
|
+
## Results
|
837
|
+
|
838
|
+
After fitting, the `results` attribute contains a pandas DataFrame with details about the discovered models, including:
|
839
|
+
- Mathematical expressions
|
840
|
+
- Model complexity
|
841
|
+
- Parameter values
|
842
|
+
- Error metrics
|
843
|
+
- NumPy-compatible expressions
|
844
|
+
|
845
|
+
## License
|
846
|
+
|
847
|
+
[LICENSE]
|
848
|
+
|
849
|
+
## Citation
|
850
|
+
|
851
|
+
If you use PyEGGP in your research, please cite:
|
852
|
+
|
853
|
+
```
|
854
|
+
@inproceedings{eggp,
|
855
|
+
author = {de Franca, Fabricio Olivetti and Kronberger, Gabriel},
|
856
|
+
title = {Improving Genetic Programming for Symbolic Regression with Equality Graphs},
|
857
|
+
year = {2025},
|
858
|
+
isbn = {9798400714658},
|
859
|
+
publisher = {Association for Computing Machinery},
|
860
|
+
address = {New York, NY, USA},
|
861
|
+
url = {https://doi.org/10.1145/3712256.3726383},
|
862
|
+
doi = {10.1145/3712256.3726383},
|
863
|
+
booktitle = {Proceedings of the Genetic and Evolutionary Computation Conference},
|
864
|
+
pages = {},
|
865
|
+
numpages = {9},
|
866
|
+
keywords = {Symbolic regression, Genetic programming, Equality saturation, Equality graphs},
|
867
|
+
location = {Malaga, Spain},
|
868
|
+
series = {GECCO '25},
|
869
|
+
archivePrefix = {arXiv},
|
870
|
+
eprint = {2501.17848},
|
871
|
+
primaryClass = {cs.LG},
|
872
|
+
}
|
873
|
+
```
|
874
|
+
|
710
875
|
The bindings were created following the amazing example written by [wenkokke](https://github.com/wenkokke/example-haskell-wheel)
|
@@ -1,19 +1,19 @@
|
|
1
1
|
pyeggp/binding.i,sha256=UvqL1SWs47LNCpgN3F6l3oufPvwaqUzdHpk0cEZQa5M,1972
|
2
2
|
pyeggp/binding.py,sha256=EhVDh4fL2Zs7erIc8YmMq8Z3VW256E2dkKdPpaprMq8,2701
|
3
3
|
pyeggp/typing.py,sha256=aVUeV85Ig86cccHbyV1KDWjp_ioTl6z3omPA2ZPk0-4,531
|
4
|
-
pyeggp/_binding.cp311-win_amd64.pyd,sha256=
|
4
|
+
pyeggp/_binding.cp311-win_amd64.pyd,sha256=u0HUUKmq9yDRzHpfLcZlkmMtpOSaZi2eY07z1DLmL38,40249856
|
5
5
|
pyeggp/_binding.py,sha256=6f4Z0L1t0gM_KYnA3hscHGIe1xVqnLt7bxph-_HpC2o,598
|
6
6
|
pyeggp/_binding.pyi,sha256=tLpel6vl4zZ_mftC6zVaWVfV2zhVxDLIZpFQ3cisgH4,467
|
7
|
-
pyeggp/__init__.py,sha256=
|
7
|
+
pyeggp/__init__.py,sha256=9GnmBOdWA0nwF09fqnA0tUE5sh_EB5TMXxaEvdXHrOk,14995
|
8
8
|
pyeggp/__main__.py,sha256=b5xBNv5E7XRE2a2cCHvSYQvDTJ7i6SR2JuAvstvEaFE,166
|
9
|
-
pyeggp-1.0.
|
10
|
-
pyeggp-1.0.
|
11
|
-
pyeggp-1.0.
|
12
|
-
pyeggp-1.0.
|
13
|
-
pyeggp-1.0.
|
14
|
-
pyeggp-1.0.
|
15
|
-
pyeggp-1.0.
|
9
|
+
pyeggp-1.0.4.dist-info/DELVEWHEEL,sha256=PpJhDf7Xk7LH2vXtn7pNc7gpZCB1xv5u_QaKouI0lDU,387
|
10
|
+
pyeggp-1.0.4.dist-info/entry_points.txt,sha256=NJouaQ2UoCLBu7Toqer0FBvNSDYMawR_Wx9STMdopyY,48
|
11
|
+
pyeggp-1.0.4.dist-info/METADATA,sha256=qKFsu0PQJFYSvHTxWONiYsmZy5rpe7brGh5LG7OKHAY,48133
|
12
|
+
pyeggp-1.0.4.dist-info/RECORD,,
|
13
|
+
pyeggp-1.0.4.dist-info/top_level.txt,sha256=iLrWfWZHh4NruYDgVyn2ntEB-uJ8fIs95nzNVLIhKE4,7
|
14
|
+
pyeggp-1.0.4.dist-info/WHEEL,sha256=pkI-s5KKCTCXRcuamRCpmUHK9lBRiVf1mC9_VUZSXgc,101
|
15
|
+
pyeggp-1.0.4.dist-info/licenses/LICENSE,sha256=IwGE9guuL-ryRPEKi6wFPI_zOhg7zDZbTYuHbSt_SAk,35823
|
16
16
|
pyeggp.libs/libgcc_s_seh-1-c7e71e753d95ee0b157469f5daa29a03.dll,sha256=OaympRU538qibFmFvn4Gt-RCKJiOQeGMApqEFl7_fKY,101376
|
17
|
-
pyeggp.libs/libnlopt-
|
17
|
+
pyeggp.libs/libnlopt-58b7f3854da0d511f4c514d3d8ae1baf.dll,sha256=hz1uvtchRLmi07vtE0YdBCWx2-Ee2t-Iye0R6am1fvg,749190
|
18
18
|
pyeggp.libs/libstdc++-6-a07611cfa5f65b02dbc1ace58f4dc030.dll,sha256=F4uaDJoJUOIf4DY3GuDR8SWay0W_LEel8SY8zJYod-k,2020352
|
19
19
|
pyeggp.libs/libwinpthread-1-2db4a17751d27a5781b70c35799daa95.dll,sha256=4BuOhf1nwrhh9k1MzH32B1m1iapBAACij6K4SiCJF1s,54784
|
index 041831c..d22e881 100644
|
|
Binary file
|
File without changes
|
File without changes
|
File without changes
|