psyke 0.4.9.dev6__py3-none-any.whl → 1.0.4.dev10__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- psyke/__init__.py +231 -85
- psyke/clustering/__init__.py +9 -4
- psyke/clustering/cream/__init__.py +6 -10
- psyke/clustering/exact/__init__.py +17 -11
- psyke/clustering/utils.py +0 -1
- psyke/extraction/__init__.py +25 -0
- psyke/extraction/cart/CartPredictor.py +128 -0
- psyke/extraction/cart/FairTree.py +205 -0
- psyke/extraction/cart/FairTreePredictor.py +56 -0
- psyke/extraction/cart/__init__.py +48 -62
- psyke/extraction/hypercubic/__init__.py +187 -47
- psyke/extraction/hypercubic/cosmik/__init__.py +47 -0
- psyke/extraction/hypercubic/creepy/__init__.py +24 -29
- psyke/extraction/hypercubic/divine/__init__.py +86 -0
- psyke/extraction/hypercubic/ginger/__init__.py +100 -0
- psyke/extraction/hypercubic/gridex/__init__.py +45 -84
- psyke/extraction/hypercubic/gridrex/__init__.py +4 -4
- psyke/extraction/hypercubic/hex/__init__.py +104 -0
- psyke/extraction/hypercubic/hypercube.py +275 -72
- psyke/extraction/hypercubic/iter/__init__.py +45 -46
- psyke/extraction/hypercubic/strategy.py +13 -9
- psyke/extraction/real/__init__.py +24 -29
- psyke/extraction/real/utils.py +2 -2
- psyke/extraction/trepan/__init__.py +24 -19
- psyke/genetic/__init__.py +0 -0
- psyke/genetic/fgin/__init__.py +74 -0
- psyke/genetic/gin/__init__.py +144 -0
- psyke/hypercubepredictor.py +102 -0
- psyke/schema/__init__.py +230 -36
- psyke/tuning/__init__.py +40 -28
- psyke/tuning/crash/__init__.py +33 -64
- psyke/tuning/orchid/__init__.py +21 -23
- psyke/tuning/pedro/__init__.py +70 -56
- psyke/utils/logic.py +8 -8
- psyke/utils/plot.py +79 -3
- {psyke-0.4.9.dev6.dist-info → psyke-1.0.4.dev10.dist-info}/METADATA +42 -22
- psyke-1.0.4.dev10.dist-info/RECORD +46 -0
- {psyke-0.4.9.dev6.dist-info → psyke-1.0.4.dev10.dist-info}/WHEEL +1 -1
- {psyke-0.4.9.dev6.dist-info → psyke-1.0.4.dev10.dist-info/licenses}/LICENSE +2 -1
- psyke/extraction/cart/predictor.py +0 -73
- psyke-0.4.9.dev6.dist-info/RECORD +0 -36
- {psyke-0.4.9.dev6.dist-info → psyke-1.0.4.dev10.dist-info}/top_level.txt +0 -0
|
@@ -8,12 +8,11 @@ import pandas as pd
|
|
|
8
8
|
from numpy import ndarray
|
|
9
9
|
|
|
10
10
|
from psyke.extraction.hypercubic.utils import Dimension, Dimensions, MinUpdate, ZippedDimension, Limit, Expansion
|
|
11
|
-
from psyke.schema import Between
|
|
11
|
+
from psyke.schema import Between, GreaterThan, LessThan
|
|
12
12
|
from psyke.utils import get_default_precision, get_int_precision, Target, get_default_random_seed
|
|
13
13
|
from psyke.utils.logic import create_term, to_rounded_real, linear_function_creator
|
|
14
14
|
from sklearn.linear_model import LinearRegression
|
|
15
15
|
from tuprolog.core import Var, Struct
|
|
16
|
-
from random import Random
|
|
17
16
|
import numpy as np
|
|
18
17
|
|
|
19
18
|
|
|
@@ -30,49 +29,93 @@ class Point:
|
|
|
30
29
|
|
|
31
30
|
EPSILON = get_default_precision()
|
|
32
31
|
|
|
33
|
-
def __init__(self, dimensions: list[str], values: list[float]):
|
|
32
|
+
def __init__(self, dimensions: list[str], values: list[float | str]):
|
|
34
33
|
self._dimensions = {dimension: value for (dimension, value) in zip(dimensions, values)}
|
|
35
34
|
|
|
36
|
-
def __getitem__(self, feature: str) -> float:
|
|
35
|
+
def __getitem__(self, feature: str) -> float | str:
|
|
37
36
|
if feature in self._dimensions.keys():
|
|
38
37
|
return self._dimensions[feature]
|
|
39
38
|
else:
|
|
40
39
|
raise FeatureNotFoundException(feature)
|
|
41
40
|
|
|
42
|
-
def __setitem__(self, key: str, value: float) -> None:
|
|
41
|
+
def __setitem__(self, key: str, value: float | str) -> None:
|
|
43
42
|
self._dimensions[key] = value
|
|
44
43
|
|
|
45
44
|
def __eq__(self, other: Point) -> bool:
|
|
46
45
|
return all([abs(self[dimension] - other[dimension]) < Point.EPSILON for dimension in self._dimensions])
|
|
47
46
|
|
|
47
|
+
def distance(self, other: Point, metric: str='Euclidean') -> float:
|
|
48
|
+
distances = [abs(self[dimension] - other[dimension]) for dimension in self._dimensions]
|
|
49
|
+
if metric == 'Euclidean':
|
|
50
|
+
distance = sum(np.array(distances)**2)**0.5
|
|
51
|
+
elif metric == 'Manhattan':
|
|
52
|
+
distance = sum(distances)
|
|
53
|
+
else:
|
|
54
|
+
raise ValueError("metric should be 'Euclidean' or 'Manhattan'")
|
|
55
|
+
return distance
|
|
56
|
+
|
|
48
57
|
@property
|
|
49
|
-
def dimensions(self) -> dict[str, float]:
|
|
58
|
+
def dimensions(self) -> dict[str, float | str]:
|
|
50
59
|
return self._dimensions
|
|
51
60
|
|
|
61
|
+
def to_dataframe(self) -> pd.DataFrame:
|
|
62
|
+
return pd.DataFrame(data=[self.dimensions.values()], columns=list(self.dimensions.keys()))
|
|
63
|
+
|
|
64
|
+
def copy(self) -> Point:
|
|
65
|
+
return Point(list(self._dimensions.keys()), list(self._dimensions.values()))
|
|
66
|
+
|
|
52
67
|
|
|
53
68
|
class HyperCube:
|
|
54
69
|
"""
|
|
55
|
-
An N-dimensional cube holding
|
|
70
|
+
An N-dimensional cube holding an output numeric value.
|
|
56
71
|
"""
|
|
57
72
|
|
|
58
73
|
EPSILON = get_default_precision() # Precision used when comparing two hypercubes
|
|
59
74
|
INT_PRECISION = get_int_precision()
|
|
60
75
|
|
|
61
76
|
def __init__(self, dimension: dict[str, tuple[float, float]] = None, limits: set[Limit] = None,
|
|
62
|
-
output: float | LinearRegression = 0.0):
|
|
77
|
+
output: float | LinearRegression | str = 0.0):
|
|
63
78
|
self._dimensions = self._fit_dimension(dimension) if dimension is not None else {}
|
|
64
79
|
self._limits = limits if limits is not None else set()
|
|
65
80
|
self._output = output
|
|
66
81
|
self._diversity = 0.0
|
|
82
|
+
self._error = 0.0
|
|
83
|
+
self._barycenter = Point([], [])
|
|
84
|
+
self._default = False
|
|
85
|
+
self._infinite_dimensions = {}
|
|
67
86
|
|
|
68
|
-
def __contains__(self,
|
|
87
|
+
def __contains__(self, obj: dict[str, float] | HyperCube) -> bool:
|
|
69
88
|
"""
|
|
70
|
-
Note that a point
|
|
71
|
-
min_dim <=
|
|
72
|
-
:param
|
|
73
|
-
:return: true if the
|
|
89
|
+
Note that a point is inside a hypercube if ALL its dimensions' values satisfy:
|
|
90
|
+
min_dim <= object dimension < max_dim
|
|
91
|
+
:param obj: an N-dimensional object (point or hypercube)
|
|
92
|
+
:return: true if the object is inside the hypercube, false otherwise
|
|
74
93
|
"""
|
|
75
|
-
|
|
94
|
+
if isinstance(obj, HyperCube):
|
|
95
|
+
for k in obj.dimensions:
|
|
96
|
+
if k not in self._infinite_dimensions:
|
|
97
|
+
if not (self.get_first(k) <= obj.get_first(k) <= obj.get_second(k) < self.get_second(k)):
|
|
98
|
+
return False
|
|
99
|
+
elif len(self._infinite_dimensions[k]) == 2:
|
|
100
|
+
continue
|
|
101
|
+
elif '+' in self._infinite_dimensions[k] and self.get_first(k) > obj.get_first(k):
|
|
102
|
+
return False
|
|
103
|
+
elif '-' in self._infinite_dimensions[k] and obj.get_second(k) >= self.get_second(k):
|
|
104
|
+
return False
|
|
105
|
+
elif isinstance(obj, dict):
|
|
106
|
+
for k, v in obj.items():
|
|
107
|
+
if k not in self._infinite_dimensions:
|
|
108
|
+
if not (self.get_first(k) <= v < self.get_second(k)):
|
|
109
|
+
return False
|
|
110
|
+
elif len(self._infinite_dimensions[k]) == 2:
|
|
111
|
+
continue
|
|
112
|
+
elif '+' in self._infinite_dimensions[k] and self.get_first(k) > v:
|
|
113
|
+
return False
|
|
114
|
+
elif '-' in self._infinite_dimensions[k] and v >= self.get_second(k):
|
|
115
|
+
return False
|
|
116
|
+
else:
|
|
117
|
+
raise TypeError("Invalid type for obj parameter")
|
|
118
|
+
return True
|
|
76
119
|
|
|
77
120
|
def __eq__(self, other: HyperCube) -> bool:
|
|
78
121
|
return all([(abs(dimension.this_dimension[0] - dimension.other_dimension[0]) < HyperCube.EPSILON)
|
|
@@ -92,6 +135,21 @@ class HyperCube:
|
|
|
92
135
|
result = [hash(name + str(dimension[0]) + str(dimension[1])) for name, dimension in self.dimensions.items()]
|
|
93
136
|
return sum(result)
|
|
94
137
|
|
|
138
|
+
@property
|
|
139
|
+
def is_default(self) -> bool:
|
|
140
|
+
return self._default
|
|
141
|
+
|
|
142
|
+
def set_default(self):
|
|
143
|
+
self._default = True
|
|
144
|
+
|
|
145
|
+
def set_infinite(self, dimension: str, direction: str):
|
|
146
|
+
if dimension not in self._infinite_dimensions:
|
|
147
|
+
self._infinite_dimensions[dimension] = set()
|
|
148
|
+
self._infinite_dimensions[dimension].add(direction)
|
|
149
|
+
|
|
150
|
+
def copy_infinite_dimensions(self, dimensions: dict[str, str]):
|
|
151
|
+
self._infinite_dimensions = dimensions.copy()
|
|
152
|
+
|
|
95
153
|
@property
|
|
96
154
|
def dimensions(self) -> Dimensions:
|
|
97
155
|
return self._dimensions
|
|
@@ -101,13 +159,28 @@ class HyperCube:
|
|
|
101
159
|
return len(self._limits)
|
|
102
160
|
|
|
103
161
|
@property
|
|
104
|
-
def output(self) -> float | LinearRegression:
|
|
162
|
+
def output(self) -> float | str | LinearRegression:
|
|
105
163
|
return self._output
|
|
106
164
|
|
|
107
165
|
@property
|
|
108
166
|
def diversity(self) -> float:
|
|
109
167
|
return self._diversity
|
|
110
168
|
|
|
169
|
+
@property
|
|
170
|
+
def error(self) -> float:
|
|
171
|
+
return self._error
|
|
172
|
+
|
|
173
|
+
@property
|
|
174
|
+
def barycenter(self) -> Point:
|
|
175
|
+
return self._barycenter
|
|
176
|
+
|
|
177
|
+
def subcubes(self, cubes: Iterable[GenericCube], only_largest: bool = True) -> Iterable[GenericCube]:
|
|
178
|
+
subcubes = [c for c in cubes if c in self and c.output != self.output]
|
|
179
|
+
if only_largest:
|
|
180
|
+
subsubcubes = [c for cube_list in [c.subcubes(cubes) for c in subcubes] for c in cube_list]
|
|
181
|
+
subcubes = [c for c in subcubes if c not in subsubcubes]
|
|
182
|
+
return subcubes
|
|
183
|
+
|
|
111
184
|
def _fit_dimension(self, dimension: dict[str, tuple[float, float]]) -> dict[str, tuple[float, float]]:
|
|
112
185
|
new_dimension: dict[str, tuple[float, float]] = {}
|
|
113
186
|
for key, value in dimension.items():
|
|
@@ -125,12 +198,11 @@ class HyperCube:
|
|
|
125
198
|
ds = dataset.to_numpy(copy=True)
|
|
126
199
|
return np.all((v[:, 0] <= ds) & (ds < v[:, 1]), axis=1)
|
|
127
200
|
|
|
128
|
-
def
|
|
201
|
+
def filter_dataframe(self, dataset: pd.DataFrame) -> pd.DataFrame:
|
|
129
202
|
return dataset[self.filter_indices(dataset)]
|
|
130
203
|
|
|
131
|
-
def _zip_dimensions(self,
|
|
132
|
-
return [ZippedDimension(dimension, self[dimension],
|
|
133
|
-
for dimension in self._dimensions.keys()]
|
|
204
|
+
def _zip_dimensions(self, other: HyperCube) -> list[ZippedDimension]:
|
|
205
|
+
return [ZippedDimension(dimension, self[dimension], other[dimension]) for dimension in self.dimensions]
|
|
134
206
|
|
|
135
207
|
def add_limit(self, limit_or_feature: Limit | str, direction: str = None) -> None:
|
|
136
208
|
if isinstance(limit_or_feature, Limit):
|
|
@@ -148,8 +220,8 @@ class HyperCube:
|
|
|
148
220
|
return '*'
|
|
149
221
|
raise Exception('Too many limits for this feature')
|
|
150
222
|
|
|
151
|
-
def create_samples(self, n: int = 1
|
|
152
|
-
return pd.DataFrame([self._create_tuple(
|
|
223
|
+
def create_samples(self, n: int = 1) -> pd.DataFrame:
|
|
224
|
+
return pd.DataFrame([self._create_tuple() for _ in range(n)])
|
|
153
225
|
|
|
154
226
|
@staticmethod
|
|
155
227
|
def check_overlap(to_check: Iterable[HyperCube], hypercubes: Iterable[HyperCube]) -> bool:
|
|
@@ -164,28 +236,38 @@ class HyperCube:
|
|
|
164
236
|
return False
|
|
165
237
|
|
|
166
238
|
def copy(self) -> HyperCube:
|
|
167
|
-
|
|
239
|
+
new_cube = HyperCube(self.dimensions.copy(), self._limits.copy(), self.output)
|
|
240
|
+
new_cube.copy_infinite_dimensions(self._infinite_dimensions)
|
|
241
|
+
return new_cube
|
|
168
242
|
|
|
169
243
|
def count(self, dataset: pd.DataFrame) -> int:
|
|
170
|
-
return self.
|
|
244
|
+
return self.filter_dataframe(dataset.iloc[:, :-1]).shape[0]
|
|
245
|
+
|
|
246
|
+
def interval_to_value(self, dimension, unscale=None):
|
|
247
|
+
if dimension not in self._infinite_dimensions:
|
|
248
|
+
return Between(unscale(self[dimension][0], dimension), unscale(self[dimension][1], dimension))
|
|
249
|
+
if len(self._infinite_dimensions[dimension]) == 2:
|
|
250
|
+
return
|
|
251
|
+
if '+' in self._infinite_dimensions[dimension]:
|
|
252
|
+
return GreaterThan(unscale(self[dimension][0], dimension))
|
|
253
|
+
if '-' in self._infinite_dimensions[dimension]:
|
|
254
|
+
return LessThan(unscale(self[dimension][1], dimension))
|
|
171
255
|
|
|
172
256
|
def body(self, variables: dict[str, Var], ignore: list[str], unscale=None, normalization=None) -> Iterable[Struct]:
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
for dimension in ignore:
|
|
177
|
-
del dimensions[dimension]
|
|
178
|
-
return [create_term(variables[name], Between(unscale(values[0], name), unscale(values[1], name)))
|
|
179
|
-
for name, values in dimensions.items()]
|
|
257
|
+
values = [(dim, self.interval_to_value(dim, unscale)) for dim in self.dimensions if dim not in ignore]
|
|
258
|
+
return [create_term(variables[name], value) for name, value in values
|
|
259
|
+
if not self.is_default and value is not None]
|
|
180
260
|
|
|
181
261
|
@staticmethod
|
|
182
|
-
def create_surrounding_cube(dataset: pd.DataFrame, closed: bool = False,
|
|
183
|
-
|
|
262
|
+
def create_surrounding_cube(dataset: pd.DataFrame, closed: bool = False, output=None,
|
|
263
|
+
features_to_ignore: Iterable[str] = []) -> GenericCube:
|
|
184
264
|
output = Target.CONSTANT if output is None else output
|
|
185
265
|
dimensions = {
|
|
186
266
|
column: (min(dataset[column]) - HyperCube.EPSILON * 2, max(dataset[column]) + HyperCube.EPSILON * 2)
|
|
187
267
|
for column in dataset.columns[:-1]
|
|
188
268
|
}
|
|
269
|
+
for column in features_to_ignore:
|
|
270
|
+
dimensions[column] = (-np.inf, np.inf)
|
|
189
271
|
if closed:
|
|
190
272
|
if output == Target.CONSTANT:
|
|
191
273
|
return ClosedCube(dimensions)
|
|
@@ -198,11 +280,11 @@ class HyperCube:
|
|
|
198
280
|
return RegressionCube(dimensions)
|
|
199
281
|
return HyperCube(dimensions)
|
|
200
282
|
|
|
201
|
-
def _create_tuple(self
|
|
202
|
-
return {k:
|
|
283
|
+
def _create_tuple(self) -> dict:
|
|
284
|
+
return {k: np.random.uniform(self[k][0], self[k][1]) for k in self._dimensions.keys()}
|
|
203
285
|
|
|
204
286
|
@staticmethod
|
|
205
|
-
def cube_from_point(point: dict, output=None) -> GenericCube:
|
|
287
|
+
def cube_from_point(point: dict[str, float], output=None) -> GenericCube:
|
|
206
288
|
if output is Target.CLASSIFICATION:
|
|
207
289
|
return ClassificationCube({k: (v, v) for k, v in list(point.items())[:-1]})
|
|
208
290
|
if output is Target.REGRESSION:
|
|
@@ -249,6 +331,7 @@ class HyperCube:
|
|
|
249
331
|
lambda a, b: a + b, [(dimension[1] - dimension[0]) ** 2 for dimension in self._dimensions.values()], 0
|
|
250
332
|
) ** 0.5
|
|
251
333
|
|
|
334
|
+
@property
|
|
252
335
|
def center(self) -> Point:
|
|
253
336
|
return Point(list(self._dimensions.keys()),
|
|
254
337
|
[(interval[0] + interval[1]) / 2 for interval in self._dimensions.values()])
|
|
@@ -258,6 +341,50 @@ class HyperCube:
|
|
|
258
341
|
Point(list(self._dimensions.keys()), values) for values in itertools.product(*self._dimensions.values())
|
|
259
342
|
]
|
|
260
343
|
|
|
344
|
+
def surface_distance(self, point: Point) -> float:
|
|
345
|
+
s = 0
|
|
346
|
+
for d in point.dimensions.keys():
|
|
347
|
+
lower, upper = self[d]
|
|
348
|
+
p = point[d]
|
|
349
|
+
if p > upper:
|
|
350
|
+
s += (p - upper)**2
|
|
351
|
+
elif p < lower:
|
|
352
|
+
s += (lower - p)**2
|
|
353
|
+
return s**0.5
|
|
354
|
+
|
|
355
|
+
def perimeter_samples(self, n: int = 5) -> Iterable[Point]:
|
|
356
|
+
def duplicate(point: Point, feature: str) -> Iterable[Point]:
|
|
357
|
+
new_point_a = point.copy()
|
|
358
|
+
new_point_b = point.copy()
|
|
359
|
+
new_point_a[feature] = self.get_first(feature)
|
|
360
|
+
new_point_b[feature] = self.get_second(feature)
|
|
361
|
+
return [new_point_a, new_point_b]
|
|
362
|
+
|
|
363
|
+
def remove_duplicates(points: Iterable[Point]) -> Iterable[Point]:
|
|
364
|
+
new_points = []
|
|
365
|
+
for point in points:
|
|
366
|
+
if point not in new_points:
|
|
367
|
+
new_points.append(point)
|
|
368
|
+
return new_points
|
|
369
|
+
|
|
370
|
+
def split(point: Point, feature: str, n: int):
|
|
371
|
+
points = []
|
|
372
|
+
a, b = self.get_first(feature), self.get_second(feature)
|
|
373
|
+
for value in np.linspace(a, b, n) if n > 1 else [(a + b) / 2]:
|
|
374
|
+
new_point = point.copy()
|
|
375
|
+
new_point[feature] = value
|
|
376
|
+
points.append(new_point)
|
|
377
|
+
return points
|
|
378
|
+
|
|
379
|
+
points = []
|
|
380
|
+
for primary in self._dimensions:
|
|
381
|
+
new_points = [Point([], [])]
|
|
382
|
+
for secondary in self._dimensions:
|
|
383
|
+
new_points = np.array([duplicate(point, secondary) if primary != secondary else
|
|
384
|
+
split(point, primary, n) for point in new_points]).flatten()
|
|
385
|
+
points = points + list(new_points)
|
|
386
|
+
return remove_duplicates(points)
|
|
387
|
+
|
|
261
388
|
def is_adjacent(self, cube: HyperCube) -> str | None:
|
|
262
389
|
adjacent = None
|
|
263
390
|
for (feature, [a1, b1]) in self._dimensions.items():
|
|
@@ -276,6 +403,15 @@ class HyperCube:
|
|
|
276
403
|
new_cube.update_dimension(feature, (min(a1, a2), max(b1, b2)))
|
|
277
404
|
return new_cube
|
|
278
405
|
|
|
406
|
+
def merge(self, other: HyperCube) -> HyperCube:
|
|
407
|
+
new_cube = self.copy()
|
|
408
|
+
for dimension in self.dimensions.keys():
|
|
409
|
+
new_cube = new_cube.merge_along_dimension(other, dimension)
|
|
410
|
+
return new_cube
|
|
411
|
+
|
|
412
|
+
def merge_with_point(self, other: Point) -> HyperCube:
|
|
413
|
+
return self.merge(HyperCube.cube_from_point(other.dimensions))
|
|
414
|
+
|
|
279
415
|
# TODO: maybe two different methods are more readable and easier to debug
|
|
280
416
|
def overlap(self, hypercubes: Iterable[HyperCube] | HyperCube) -> HyperCube | bool | None:
|
|
281
417
|
if isinstance(hypercubes, Iterable):
|
|
@@ -297,11 +433,16 @@ class HyperCube:
|
|
|
297
433
|
else:
|
|
298
434
|
self.update_dimension(feature, (lower, upper))
|
|
299
435
|
|
|
300
|
-
def update(self, dataset: pd.DataFrame, predictor) -> None:
|
|
301
|
-
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
|
|
436
|
+
def update(self, dataset: pd.DataFrame, predictor=None) -> None:
|
|
437
|
+
idx = self.filter_indices(dataset.iloc[:, :-1])
|
|
438
|
+
filtered = dataset.iloc[idx, :-1]
|
|
439
|
+
if len(filtered > 0):
|
|
440
|
+
predictions = dataset.iloc[idx, -1] if predictor is None else predictor.predict(filtered)
|
|
441
|
+
self._output = np.mean(predictions)
|
|
442
|
+
self._diversity = np.std(predictions)
|
|
443
|
+
self._error = (abs(predictions - self._output)).mean()
|
|
444
|
+
means = filtered.describe().loc['mean']
|
|
445
|
+
self._barycenter = Point(means.index.values, means.values)
|
|
305
446
|
|
|
306
447
|
# TODO: why this is not a property?
|
|
307
448
|
def init_diversity(self, std: float) -> None:
|
|
@@ -309,52 +450,102 @@ class HyperCube:
|
|
|
309
450
|
|
|
310
451
|
|
|
311
452
|
class RegressionCube(HyperCube):
|
|
312
|
-
def __init__(self, dimension: dict[str, tuple] = None):
|
|
313
|
-
super().__init__(dimension=dimension, output=LinearRegression())
|
|
453
|
+
def __init__(self, dimension: dict[str, tuple] = None, limits: set[Limit] = None, output=None):
|
|
454
|
+
super().__init__(dimension=dimension, limits=limits, output=LinearRegression() if output is None else output)
|
|
314
455
|
|
|
315
|
-
def update(self, dataset: pd.DataFrame, predictor) -> None:
|
|
316
|
-
|
|
456
|
+
def update(self, dataset: pd.DataFrame, predictor=None) -> None:
|
|
457
|
+
idx = self.filter_indices(dataset.iloc[:, :-1])
|
|
458
|
+
filtered = dataset.iloc[idx, :-1]
|
|
317
459
|
if len(filtered > 0):
|
|
318
|
-
predictions = predictor.predict(filtered)
|
|
460
|
+
predictions = dataset.iloc[idx, -1] if predictor is None else predictor.predict(filtered)
|
|
319
461
|
self._output.fit(filtered, predictions)
|
|
320
|
-
self._diversity = (abs(self._output.predict(filtered) - predictions)).mean()
|
|
462
|
+
self._diversity = self._error = (abs(self._output.predict(filtered) - predictions)).mean()
|
|
463
|
+
means = filtered.describe().loc['mean']
|
|
464
|
+
self._barycenter = Point(means.index.values, means.values)
|
|
321
465
|
|
|
322
466
|
def copy(self) -> RegressionCube:
|
|
323
|
-
|
|
467
|
+
output = LinearRegression()
|
|
468
|
+
try:
|
|
469
|
+
output.coef_ = self.output.coef_.copy()
|
|
470
|
+
output.intercept_ = self.output.intercept_
|
|
471
|
+
except AttributeError:
|
|
472
|
+
pass
|
|
473
|
+
new_cube = RegressionCube(self.dimensions.copy(), self._limits.copy(), output)
|
|
474
|
+
new_cube.copy_infinite_dimensions(self._infinite_dimensions)
|
|
475
|
+
return new_cube
|
|
324
476
|
|
|
325
477
|
def body(self, variables: dict[str, Var], ignore: list[str], unscale=None, normalization=None) -> Iterable[Struct]:
|
|
326
|
-
intercept = self.output.intercept_
|
|
327
|
-
|
|
328
|
-
|
|
329
|
-
|
|
330
|
-
|
|
478
|
+
intercept = self.output.intercept_
|
|
479
|
+
intercept = np.array(intercept).flatten()[0] if isinstance(intercept, Iterable) else intercept
|
|
480
|
+
intercept = intercept if normalization is None else unscale(sum(
|
|
481
|
+
[-self.output.coef_.flatten()[i] * normalization[name][0] / normalization[name][1] for i, name in
|
|
482
|
+
enumerate(self.dimensions.keys())], intercept), list(normalization.keys())[-1])
|
|
483
|
+
coefs = self.output.coef_.flatten() if normalization is None else [
|
|
484
|
+
self.output.coef_.flatten()[i] / normalization[name][1] * normalization[list(normalization.keys())[-1]][1]
|
|
485
|
+
for i, name in enumerate(self.dimensions.keys())
|
|
486
|
+
]
|
|
331
487
|
return list(super().body(variables, ignore, unscale, normalization)) + [linear_function_creator(
|
|
332
|
-
list(variables.values()), [to_rounded_real(v) for v in coefs],
|
|
333
|
-
to_rounded_real(intercept)
|
|
488
|
+
list(variables.values()), [to_rounded_real(v) for v in coefs], to_rounded_real(intercept)
|
|
334
489
|
)]
|
|
335
490
|
|
|
336
491
|
|
|
337
492
|
class ClassificationCube(HyperCube):
|
|
338
|
-
def __init__(self, dimension: dict[str, tuple] = None):
|
|
339
|
-
super().__init__(dimension=dimension)
|
|
493
|
+
def __init__(self, dimension: dict[str, tuple] = None, limits: set[Limit] = None, output: str = ""):
|
|
494
|
+
super().__init__(dimension=dimension, limits=limits, output=output)
|
|
340
495
|
|
|
341
|
-
def update(self, dataset: pd.DataFrame, predictor) -> None:
|
|
342
|
-
|
|
496
|
+
def update(self, dataset: pd.DataFrame, predictor=None) -> None:
|
|
497
|
+
idx = self.filter_indices(dataset.iloc[:, :-1])
|
|
498
|
+
filtered = dataset.iloc[idx, :-1]
|
|
343
499
|
if len(filtered > 0):
|
|
344
|
-
predictions = predictor.predict(filtered)
|
|
500
|
+
predictions = dataset.iloc[idx, -1] if predictor is None else predictor.predict(filtered)
|
|
345
501
|
self._output = mode(predictions)
|
|
346
|
-
self._diversity = 1 - sum(
|
|
502
|
+
self._diversity = self._error = 1 - sum(p == self.output for p in predictions) / len(predictions)
|
|
503
|
+
means = filtered.describe().loc['mean']
|
|
504
|
+
self._barycenter = Point(means.index.values, means.values)
|
|
347
505
|
|
|
348
506
|
def copy(self) -> ClassificationCube:
|
|
349
|
-
|
|
507
|
+
new_cube = ClassificationCube(self.dimensions.copy(), self._limits.copy(), self.output)
|
|
508
|
+
new_cube.copy_infinite_dimensions(self._infinite_dimensions)
|
|
509
|
+
return new_cube
|
|
350
510
|
|
|
351
511
|
|
|
352
512
|
class ClosedCube(HyperCube):
|
|
353
|
-
def __init__(self, dimension: dict[str, tuple] = None
|
|
354
|
-
|
|
513
|
+
def __init__(self, dimension: dict[str, tuple] = None, limits: set[Limit] = None,
|
|
514
|
+
output: str | LinearRegression | float = 0.0):
|
|
515
|
+
super().__init__(dimension=dimension, limits=limits, output=output)
|
|
355
516
|
|
|
356
|
-
def __contains__(self,
|
|
357
|
-
|
|
517
|
+
def __contains__(self, obj: dict[str, float] | ClosedCube) -> bool:
|
|
518
|
+
"""
|
|
519
|
+
Note that an object is inside a hypercube if ALL its dimensions' values satisfy:
|
|
520
|
+
min_dim <= object dimension <= max_dim
|
|
521
|
+
:param obj: an N-dimensional object (point or hypercube)
|
|
522
|
+
:return: true if the object is inside the hypercube, false otherwise
|
|
523
|
+
"""
|
|
524
|
+
if isinstance(obj, HyperCube):
|
|
525
|
+
for k in obj.dimensions:
|
|
526
|
+
if k not in self._infinite_dimensions:
|
|
527
|
+
if not (self.get_first(k) <= obj.get_first(k) <= obj.get_second(k) <= self.get_second(k)):
|
|
528
|
+
return False
|
|
529
|
+
elif len(self._infinite_dimensions[k]) == 2:
|
|
530
|
+
continue
|
|
531
|
+
elif '+' in self._infinite_dimensions[k] and self.get_first(k) > obj.get_first(k):
|
|
532
|
+
return False
|
|
533
|
+
elif '-' in self._infinite_dimensions[k] and obj.get_second(k) > self.get_second(k):
|
|
534
|
+
return False
|
|
535
|
+
elif isinstance(obj, dict):
|
|
536
|
+
for k, v in obj.items():
|
|
537
|
+
if k not in self._infinite_dimensions:
|
|
538
|
+
if not (self.get_first(k) <= v <= self.get_second(k)):
|
|
539
|
+
return False
|
|
540
|
+
elif len(self._infinite_dimensions[k]) == 2:
|
|
541
|
+
continue
|
|
542
|
+
elif '+' in self._infinite_dimensions[k] and self.get_first(k) > v:
|
|
543
|
+
return False
|
|
544
|
+
elif '-' in self._infinite_dimensions[k] and v > self.get_second(k):
|
|
545
|
+
return False
|
|
546
|
+
else:
|
|
547
|
+
raise TypeError("Invalid type for obj parameter")
|
|
548
|
+
return True
|
|
358
549
|
|
|
359
550
|
def filter_indices(self, dataset: pd.DataFrame) -> ndarray:
|
|
360
551
|
v = np.array([v for _, v in self._dimensions.items()])
|
|
@@ -362,23 +553,35 @@ class ClosedCube(HyperCube):
|
|
|
362
553
|
return np.all((v[:, 0] <= ds) & (ds <= v[:, 1]), axis=1)
|
|
363
554
|
|
|
364
555
|
def copy(self) -> ClosedCube:
|
|
365
|
-
|
|
556
|
+
new_cube = ClosedCube(self.dimensions.copy(), self._limits.copy(), self.output)
|
|
557
|
+
new_cube.copy_infinite_dimensions(self._infinite_dimensions)
|
|
558
|
+
return new_cube
|
|
366
559
|
|
|
367
560
|
|
|
368
561
|
class ClosedRegressionCube(ClosedCube, RegressionCube):
|
|
369
|
-
def __init__(self, dimension: dict[str, tuple] = None):
|
|
370
|
-
super().__init__(dimension=dimension)
|
|
562
|
+
def __init__(self, dimension: dict[str, tuple] = None, limits: set[Limit] = None, output=None):
|
|
563
|
+
super().__init__(dimension=dimension, limits=limits, output=LinearRegression() if output is None else output)
|
|
371
564
|
|
|
372
565
|
def copy(self) -> ClosedRegressionCube:
|
|
373
|
-
|
|
566
|
+
output = LinearRegression()
|
|
567
|
+
try:
|
|
568
|
+
output.coef_ = self.output.coef_.copy()
|
|
569
|
+
output.intercept_ = self.output.intercept_
|
|
570
|
+
except AttributeError:
|
|
571
|
+
pass
|
|
572
|
+
new_cube = ClosedRegressionCube(self.dimensions.copy(), self._limits.copy(), output)
|
|
573
|
+
new_cube.copy_infinite_dimensions(self._infinite_dimensions)
|
|
574
|
+
return new_cube
|
|
374
575
|
|
|
375
576
|
|
|
376
577
|
class ClosedClassificationCube(ClosedCube, ClassificationCube):
|
|
377
|
-
def __init__(self, dimension: dict[str, tuple] = None):
|
|
378
|
-
super().__init__(dimension=dimension)
|
|
578
|
+
def __init__(self, dimension: dict[str, tuple] = None, limits: set[Limit] = None, output: str = None):
|
|
579
|
+
super().__init__(dimension=dimension, limits=limits, output=output)
|
|
379
580
|
|
|
380
581
|
def copy(self) -> ClosedClassificationCube:
|
|
381
|
-
|
|
582
|
+
new_cube = ClosedClassificationCube(self.dimensions.copy(), self._limits.copy(), self.output)
|
|
583
|
+
new_cube.copy_infinite_dimensions(self._infinite_dimensions)
|
|
584
|
+
return new_cube
|
|
382
585
|
|
|
383
586
|
|
|
384
587
|
GenericCube = Union[HyperCube, ClassificationCube, RegressionCube,
|