psyke 0.4.9.dev6__py3-none-any.whl → 1.0.4.dev10__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. psyke/__init__.py +231 -85
  2. psyke/clustering/__init__.py +9 -4
  3. psyke/clustering/cream/__init__.py +6 -10
  4. psyke/clustering/exact/__init__.py +17 -11
  5. psyke/clustering/utils.py +0 -1
  6. psyke/extraction/__init__.py +25 -0
  7. psyke/extraction/cart/CartPredictor.py +128 -0
  8. psyke/extraction/cart/FairTree.py +205 -0
  9. psyke/extraction/cart/FairTreePredictor.py +56 -0
  10. psyke/extraction/cart/__init__.py +48 -62
  11. psyke/extraction/hypercubic/__init__.py +187 -47
  12. psyke/extraction/hypercubic/cosmik/__init__.py +47 -0
  13. psyke/extraction/hypercubic/creepy/__init__.py +24 -29
  14. psyke/extraction/hypercubic/divine/__init__.py +86 -0
  15. psyke/extraction/hypercubic/ginger/__init__.py +100 -0
  16. psyke/extraction/hypercubic/gridex/__init__.py +45 -84
  17. psyke/extraction/hypercubic/gridrex/__init__.py +4 -4
  18. psyke/extraction/hypercubic/hex/__init__.py +104 -0
  19. psyke/extraction/hypercubic/hypercube.py +275 -72
  20. psyke/extraction/hypercubic/iter/__init__.py +45 -46
  21. psyke/extraction/hypercubic/strategy.py +13 -9
  22. psyke/extraction/real/__init__.py +24 -29
  23. psyke/extraction/real/utils.py +2 -2
  24. psyke/extraction/trepan/__init__.py +24 -19
  25. psyke/genetic/__init__.py +0 -0
  26. psyke/genetic/fgin/__init__.py +74 -0
  27. psyke/genetic/gin/__init__.py +144 -0
  28. psyke/hypercubepredictor.py +102 -0
  29. psyke/schema/__init__.py +230 -36
  30. psyke/tuning/__init__.py +40 -28
  31. psyke/tuning/crash/__init__.py +33 -64
  32. psyke/tuning/orchid/__init__.py +21 -23
  33. psyke/tuning/pedro/__init__.py +70 -56
  34. psyke/utils/logic.py +8 -8
  35. psyke/utils/plot.py +79 -3
  36. {psyke-0.4.9.dev6.dist-info → psyke-1.0.4.dev10.dist-info}/METADATA +42 -22
  37. psyke-1.0.4.dev10.dist-info/RECORD +46 -0
  38. {psyke-0.4.9.dev6.dist-info → psyke-1.0.4.dev10.dist-info}/WHEEL +1 -1
  39. {psyke-0.4.9.dev6.dist-info → psyke-1.0.4.dev10.dist-info/licenses}/LICENSE +2 -1
  40. psyke/extraction/cart/predictor.py +0 -73
  41. psyke-0.4.9.dev6.dist-info/RECORD +0 -36
  42. {psyke-0.4.9.dev6.dist-info → psyke-1.0.4.dev10.dist-info}/top_level.txt +0 -0
@@ -8,12 +8,11 @@ import pandas as pd
8
8
  from numpy import ndarray
9
9
 
10
10
  from psyke.extraction.hypercubic.utils import Dimension, Dimensions, MinUpdate, ZippedDimension, Limit, Expansion
11
- from psyke.schema import Between
11
+ from psyke.schema import Between, GreaterThan, LessThan
12
12
  from psyke.utils import get_default_precision, get_int_precision, Target, get_default_random_seed
13
13
  from psyke.utils.logic import create_term, to_rounded_real, linear_function_creator
14
14
  from sklearn.linear_model import LinearRegression
15
15
  from tuprolog.core import Var, Struct
16
- from random import Random
17
16
  import numpy as np
18
17
 
19
18
 
@@ -30,49 +29,93 @@ class Point:
30
29
 
31
30
  EPSILON = get_default_precision()
32
31
 
33
- def __init__(self, dimensions: list[str], values: list[float]):
32
+ def __init__(self, dimensions: list[str], values: list[float | str]):
34
33
  self._dimensions = {dimension: value for (dimension, value) in zip(dimensions, values)}
35
34
 
36
- def __getitem__(self, feature: str) -> float:
35
+ def __getitem__(self, feature: str) -> float | str:
37
36
  if feature in self._dimensions.keys():
38
37
  return self._dimensions[feature]
39
38
  else:
40
39
  raise FeatureNotFoundException(feature)
41
40
 
42
- def __setitem__(self, key: str, value: float) -> None:
41
+ def __setitem__(self, key: str, value: float | str) -> None:
43
42
  self._dimensions[key] = value
44
43
 
45
44
  def __eq__(self, other: Point) -> bool:
46
45
  return all([abs(self[dimension] - other[dimension]) < Point.EPSILON for dimension in self._dimensions])
47
46
 
47
+ def distance(self, other: Point, metric: str='Euclidean') -> float:
48
+ distances = [abs(self[dimension] - other[dimension]) for dimension in self._dimensions]
49
+ if metric == 'Euclidean':
50
+ distance = sum(np.array(distances)**2)**0.5
51
+ elif metric == 'Manhattan':
52
+ distance = sum(distances)
53
+ else:
54
+ raise ValueError("metric should be 'Euclidean' or 'Manhattan'")
55
+ return distance
56
+
48
57
  @property
49
- def dimensions(self) -> dict[str, float]:
58
+ def dimensions(self) -> dict[str, float | str]:
50
59
  return self._dimensions
51
60
 
61
+ def to_dataframe(self) -> pd.DataFrame:
62
+ return pd.DataFrame(data=[self.dimensions.values()], columns=list(self.dimensions.keys()))
63
+
64
+ def copy(self) -> Point:
65
+ return Point(list(self._dimensions.keys()), list(self._dimensions.values()))
66
+
52
67
 
53
68
  class HyperCube:
54
69
  """
55
- An N-dimensional cube holding a numeric value.
70
+ An N-dimensional cube holding an output numeric value.
56
71
  """
57
72
 
58
73
  EPSILON = get_default_precision() # Precision used when comparing two hypercubes
59
74
  INT_PRECISION = get_int_precision()
60
75
 
61
76
  def __init__(self, dimension: dict[str, tuple[float, float]] = None, limits: set[Limit] = None,
62
- output: float | LinearRegression = 0.0):
77
+ output: float | LinearRegression | str = 0.0):
63
78
  self._dimensions = self._fit_dimension(dimension) if dimension is not None else {}
64
79
  self._limits = limits if limits is not None else set()
65
80
  self._output = output
66
81
  self._diversity = 0.0
82
+ self._error = 0.0
83
+ self._barycenter = Point([], [])
84
+ self._default = False
85
+ self._infinite_dimensions = {}
67
86
 
68
- def __contains__(self, point: dict[str, float]) -> bool:
87
+ def __contains__(self, obj: dict[str, float] | HyperCube) -> bool:
69
88
  """
70
- Note that a point (dict[str, float]) is inside a hypercube if ALL its dimensions' values satisfy:
71
- min_dim <= value < max_dim
72
- :param point: an N-dimensional point
73
- :return: true if the point is inside the hypercube, false otherwise
89
+ Note that a point is inside a hypercube if ALL its dimensions' values satisfy:
90
+ min_dim <= object dimension < max_dim
91
+ :param obj: an N-dimensional object (point or hypercube)
92
+ :return: true if the object is inside the hypercube, false otherwise
74
93
  """
75
- return all([(self.get_first(k) <= v < self.get_second(k)) for k, v in point.items()])
94
+ if isinstance(obj, HyperCube):
95
+ for k in obj.dimensions:
96
+ if k not in self._infinite_dimensions:
97
+ if not (self.get_first(k) <= obj.get_first(k) <= obj.get_second(k) < self.get_second(k)):
98
+ return False
99
+ elif len(self._infinite_dimensions[k]) == 2:
100
+ continue
101
+ elif '+' in self._infinite_dimensions[k] and self.get_first(k) > obj.get_first(k):
102
+ return False
103
+ elif '-' in self._infinite_dimensions[k] and obj.get_second(k) >= self.get_second(k):
104
+ return False
105
+ elif isinstance(obj, dict):
106
+ for k, v in obj.items():
107
+ if k not in self._infinite_dimensions:
108
+ if not (self.get_first(k) <= v < self.get_second(k)):
109
+ return False
110
+ elif len(self._infinite_dimensions[k]) == 2:
111
+ continue
112
+ elif '+' in self._infinite_dimensions[k] and self.get_first(k) > v:
113
+ return False
114
+ elif '-' in self._infinite_dimensions[k] and v >= self.get_second(k):
115
+ return False
116
+ else:
117
+ raise TypeError("Invalid type for obj parameter")
118
+ return True
76
119
 
77
120
  def __eq__(self, other: HyperCube) -> bool:
78
121
  return all([(abs(dimension.this_dimension[0] - dimension.other_dimension[0]) < HyperCube.EPSILON)
@@ -92,6 +135,21 @@ class HyperCube:
92
135
  result = [hash(name + str(dimension[0]) + str(dimension[1])) for name, dimension in self.dimensions.items()]
93
136
  return sum(result)
94
137
 
138
+ @property
139
+ def is_default(self) -> bool:
140
+ return self._default
141
+
142
+ def set_default(self):
143
+ self._default = True
144
+
145
+ def set_infinite(self, dimension: str, direction: str):
146
+ if dimension not in self._infinite_dimensions:
147
+ self._infinite_dimensions[dimension] = set()
148
+ self._infinite_dimensions[dimension].add(direction)
149
+
150
+ def copy_infinite_dimensions(self, dimensions: dict[str, str]):
151
+ self._infinite_dimensions = dimensions.copy()
152
+
95
153
  @property
96
154
  def dimensions(self) -> Dimensions:
97
155
  return self._dimensions
@@ -101,13 +159,28 @@ class HyperCube:
101
159
  return len(self._limits)
102
160
 
103
161
  @property
104
- def output(self) -> float | LinearRegression:
162
+ def output(self) -> float | str | LinearRegression:
105
163
  return self._output
106
164
 
107
165
  @property
108
166
  def diversity(self) -> float:
109
167
  return self._diversity
110
168
 
169
+ @property
170
+ def error(self) -> float:
171
+ return self._error
172
+
173
+ @property
174
+ def barycenter(self) -> Point:
175
+ return self._barycenter
176
+
177
+ def subcubes(self, cubes: Iterable[GenericCube], only_largest: bool = True) -> Iterable[GenericCube]:
178
+ subcubes = [c for c in cubes if c in self and c.output != self.output]
179
+ if only_largest:
180
+ subsubcubes = [c for cube_list in [c.subcubes(cubes) for c in subcubes] for c in cube_list]
181
+ subcubes = [c for c in subcubes if c not in subsubcubes]
182
+ return subcubes
183
+
111
184
  def _fit_dimension(self, dimension: dict[str, tuple[float, float]]) -> dict[str, tuple[float, float]]:
112
185
  new_dimension: dict[str, tuple[float, float]] = {}
113
186
  for key, value in dimension.items():
@@ -125,12 +198,11 @@ class HyperCube:
125
198
  ds = dataset.to_numpy(copy=True)
126
199
  return np.all((v[:, 0] <= ds) & (ds < v[:, 1]), axis=1)
127
200
 
128
- def _filter_dataframe(self, dataset: pd.DataFrame) -> pd.DataFrame:
201
+ def filter_dataframe(self, dataset: pd.DataFrame) -> pd.DataFrame:
129
202
  return dataset[self.filter_indices(dataset)]
130
203
 
131
- def _zip_dimensions(self, hypercube: HyperCube) -> list[ZippedDimension]:
132
- return [ZippedDimension(dimension, self[dimension], hypercube[dimension])
133
- for dimension in self._dimensions.keys()]
204
+ def _zip_dimensions(self, other: HyperCube) -> list[ZippedDimension]:
205
+ return [ZippedDimension(dimension, self[dimension], other[dimension]) for dimension in self.dimensions]
134
206
 
135
207
  def add_limit(self, limit_or_feature: Limit | str, direction: str = None) -> None:
136
208
  if isinstance(limit_or_feature, Limit):
@@ -148,8 +220,8 @@ class HyperCube:
148
220
  return '*'
149
221
  raise Exception('Too many limits for this feature')
150
222
 
151
- def create_samples(self, n: int = 1, generator: Random = Random(get_default_random_seed())) -> pd.DataFrame:
152
- return pd.DataFrame([self._create_tuple(generator) for _ in range(n)])
223
+ def create_samples(self, n: int = 1) -> pd.DataFrame:
224
+ return pd.DataFrame([self._create_tuple() for _ in range(n)])
153
225
 
154
226
  @staticmethod
155
227
  def check_overlap(to_check: Iterable[HyperCube], hypercubes: Iterable[HyperCube]) -> bool:
@@ -164,28 +236,38 @@ class HyperCube:
164
236
  return False
165
237
 
166
238
  def copy(self) -> HyperCube:
167
- return HyperCube(self.dimensions.copy(), self._limits.copy(), self.output)
239
+ new_cube = HyperCube(self.dimensions.copy(), self._limits.copy(), self.output)
240
+ new_cube.copy_infinite_dimensions(self._infinite_dimensions)
241
+ return new_cube
168
242
 
169
243
  def count(self, dataset: pd.DataFrame) -> int:
170
- return self._filter_dataframe(dataset.iloc[:, :-1]).shape[0]
244
+ return self.filter_dataframe(dataset.iloc[:, :-1]).shape[0]
245
+
246
+ def interval_to_value(self, dimension, unscale=None):
247
+ if dimension not in self._infinite_dimensions:
248
+ return Between(unscale(self[dimension][0], dimension), unscale(self[dimension][1], dimension))
249
+ if len(self._infinite_dimensions[dimension]) == 2:
250
+ return
251
+ if '+' in self._infinite_dimensions[dimension]:
252
+ return GreaterThan(unscale(self[dimension][0], dimension))
253
+ if '-' in self._infinite_dimensions[dimension]:
254
+ return LessThan(unscale(self[dimension][1], dimension))
171
255
 
172
256
  def body(self, variables: dict[str, Var], ignore: list[str], unscale=None, normalization=None) -> Iterable[Struct]:
173
- dimensions = dict(self.dimensions)
174
- # TODO: there is something strange in the tests here
175
- # print('search', [name for name in dimensions.keys()], 'in', (variables.keys()))
176
- for dimension in ignore:
177
- del dimensions[dimension]
178
- return [create_term(variables[name], Between(unscale(values[0], name), unscale(values[1], name)))
179
- for name, values in dimensions.items()]
257
+ values = [(dim, self.interval_to_value(dim, unscale)) for dim in self.dimensions if dim not in ignore]
258
+ return [create_term(variables[name], value) for name, value in values
259
+ if not self.is_default and value is not None]
180
260
 
181
261
  @staticmethod
182
- def create_surrounding_cube(dataset: pd.DataFrame, closed: bool = False,
183
- output=None) -> GenericCube:
262
+ def create_surrounding_cube(dataset: pd.DataFrame, closed: bool = False, output=None,
263
+ features_to_ignore: Iterable[str] = []) -> GenericCube:
184
264
  output = Target.CONSTANT if output is None else output
185
265
  dimensions = {
186
266
  column: (min(dataset[column]) - HyperCube.EPSILON * 2, max(dataset[column]) + HyperCube.EPSILON * 2)
187
267
  for column in dataset.columns[:-1]
188
268
  }
269
+ for column in features_to_ignore:
270
+ dimensions[column] = (-np.inf, np.inf)
189
271
  if closed:
190
272
  if output == Target.CONSTANT:
191
273
  return ClosedCube(dimensions)
@@ -198,11 +280,11 @@ class HyperCube:
198
280
  return RegressionCube(dimensions)
199
281
  return HyperCube(dimensions)
200
282
 
201
- def _create_tuple(self, generator: Random) -> dict:
202
- return {k: generator.uniform(self.get_first(k), self.get_second(k)) for k in self._dimensions.keys()}
283
+ def _create_tuple(self) -> dict:
284
+ return {k: np.random.uniform(self[k][0], self[k][1]) for k in self._dimensions.keys()}
203
285
 
204
286
  @staticmethod
205
- def cube_from_point(point: dict, output=None) -> GenericCube:
287
+ def cube_from_point(point: dict[str, float], output=None) -> GenericCube:
206
288
  if output is Target.CLASSIFICATION:
207
289
  return ClassificationCube({k: (v, v) for k, v in list(point.items())[:-1]})
208
290
  if output is Target.REGRESSION:
@@ -249,6 +331,7 @@ class HyperCube:
249
331
  lambda a, b: a + b, [(dimension[1] - dimension[0]) ** 2 for dimension in self._dimensions.values()], 0
250
332
  ) ** 0.5
251
333
 
334
+ @property
252
335
  def center(self) -> Point:
253
336
  return Point(list(self._dimensions.keys()),
254
337
  [(interval[0] + interval[1]) / 2 for interval in self._dimensions.values()])
@@ -258,6 +341,50 @@ class HyperCube:
258
341
  Point(list(self._dimensions.keys()), values) for values in itertools.product(*self._dimensions.values())
259
342
  ]
260
343
 
344
+ def surface_distance(self, point: Point) -> float:
345
+ s = 0
346
+ for d in point.dimensions.keys():
347
+ lower, upper = self[d]
348
+ p = point[d]
349
+ if p > upper:
350
+ s += (p - upper)**2
351
+ elif p < lower:
352
+ s += (lower - p)**2
353
+ return s**0.5
354
+
355
+ def perimeter_samples(self, n: int = 5) -> Iterable[Point]:
356
+ def duplicate(point: Point, feature: str) -> Iterable[Point]:
357
+ new_point_a = point.copy()
358
+ new_point_b = point.copy()
359
+ new_point_a[feature] = self.get_first(feature)
360
+ new_point_b[feature] = self.get_second(feature)
361
+ return [new_point_a, new_point_b]
362
+
363
+ def remove_duplicates(points: Iterable[Point]) -> Iterable[Point]:
364
+ new_points = []
365
+ for point in points:
366
+ if point not in new_points:
367
+ new_points.append(point)
368
+ return new_points
369
+
370
+ def split(point: Point, feature: str, n: int):
371
+ points = []
372
+ a, b = self.get_first(feature), self.get_second(feature)
373
+ for value in np.linspace(a, b, n) if n > 1 else [(a + b) / 2]:
374
+ new_point = point.copy()
375
+ new_point[feature] = value
376
+ points.append(new_point)
377
+ return points
378
+
379
+ points = []
380
+ for primary in self._dimensions:
381
+ new_points = [Point([], [])]
382
+ for secondary in self._dimensions:
383
+ new_points = np.array([duplicate(point, secondary) if primary != secondary else
384
+ split(point, primary, n) for point in new_points]).flatten()
385
+ points = points + list(new_points)
386
+ return remove_duplicates(points)
387
+
261
388
  def is_adjacent(self, cube: HyperCube) -> str | None:
262
389
  adjacent = None
263
390
  for (feature, [a1, b1]) in self._dimensions.items():
@@ -276,6 +403,15 @@ class HyperCube:
276
403
  new_cube.update_dimension(feature, (min(a1, a2), max(b1, b2)))
277
404
  return new_cube
278
405
 
406
+ def merge(self, other: HyperCube) -> HyperCube:
407
+ new_cube = self.copy()
408
+ for dimension in self.dimensions.keys():
409
+ new_cube = new_cube.merge_along_dimension(other, dimension)
410
+ return new_cube
411
+
412
+ def merge_with_point(self, other: Point) -> HyperCube:
413
+ return self.merge(HyperCube.cube_from_point(other.dimensions))
414
+
279
415
  # TODO: maybe two different methods are more readable and easier to debug
280
416
  def overlap(self, hypercubes: Iterable[HyperCube] | HyperCube) -> HyperCube | bool | None:
281
417
  if isinstance(hypercubes, Iterable):
@@ -297,11 +433,16 @@ class HyperCube:
297
433
  else:
298
434
  self.update_dimension(feature, (lower, upper))
299
435
 
300
- def update(self, dataset: pd.DataFrame, predictor) -> None:
301
- filtered = self._filter_dataframe(dataset.iloc[:, :-1])
302
- predictions = predictor.predict(filtered)
303
- self._output = np.mean(predictions)
304
- self._diversity = np.std(predictions)
436
+ def update(self, dataset: pd.DataFrame, predictor=None) -> None:
437
+ idx = self.filter_indices(dataset.iloc[:, :-1])
438
+ filtered = dataset.iloc[idx, :-1]
439
+ if len(filtered > 0):
440
+ predictions = dataset.iloc[idx, -1] if predictor is None else predictor.predict(filtered)
441
+ self._output = np.mean(predictions)
442
+ self._diversity = np.std(predictions)
443
+ self._error = (abs(predictions - self._output)).mean()
444
+ means = filtered.describe().loc['mean']
445
+ self._barycenter = Point(means.index.values, means.values)
305
446
 
306
447
  # TODO: why this is not a property?
307
448
  def init_diversity(self, std: float) -> None:
@@ -309,52 +450,102 @@ class HyperCube:
309
450
 
310
451
 
311
452
  class RegressionCube(HyperCube):
312
- def __init__(self, dimension: dict[str, tuple] = None):
313
- super().__init__(dimension=dimension, output=LinearRegression())
453
+ def __init__(self, dimension: dict[str, tuple] = None, limits: set[Limit] = None, output=None):
454
+ super().__init__(dimension=dimension, limits=limits, output=LinearRegression() if output is None else output)
314
455
 
315
- def update(self, dataset: pd.DataFrame, predictor) -> None:
316
- filtered = self._filter_dataframe(dataset.iloc[:, :-1])
456
+ def update(self, dataset: pd.DataFrame, predictor=None) -> None:
457
+ idx = self.filter_indices(dataset.iloc[:, :-1])
458
+ filtered = dataset.iloc[idx, :-1]
317
459
  if len(filtered > 0):
318
- predictions = predictor.predict(filtered)
460
+ predictions = dataset.iloc[idx, -1] if predictor is None else predictor.predict(filtered)
319
461
  self._output.fit(filtered, predictions)
320
- self._diversity = (abs(self._output.predict(filtered) - predictions)).mean()
462
+ self._diversity = self._error = (abs(self._output.predict(filtered) - predictions)).mean()
463
+ means = filtered.describe().loc['mean']
464
+ self._barycenter = Point(means.index.values, means.values)
321
465
 
322
466
  def copy(self) -> RegressionCube:
323
- return RegressionCube(self.dimensions.copy())
467
+ output = LinearRegression()
468
+ try:
469
+ output.coef_ = self.output.coef_.copy()
470
+ output.intercept_ = self.output.intercept_
471
+ except AttributeError:
472
+ pass
473
+ new_cube = RegressionCube(self.dimensions.copy(), self._limits.copy(), output)
474
+ new_cube.copy_infinite_dimensions(self._infinite_dimensions)
475
+ return new_cube
324
476
 
325
477
  def body(self, variables: dict[str, Var], ignore: list[str], unscale=None, normalization=None) -> Iterable[Struct]:
326
- intercept = self.output.intercept_ if normalization is None else \
327
- unscale(sum([-self.output.coef_[i] * normalization[name][0] / normalization[name][1] for i, name in
328
- enumerate(self.dimensions.keys())], self.output.intercept_), list(normalization.keys())[-1])
329
- coefs = self.output.coef_ if normalization is None else \
330
- [self.output.coef_[i] / normalization[name][1] for i, name in enumerate(self.dimensions.keys())]
478
+ intercept = self.output.intercept_
479
+ intercept = np.array(intercept).flatten()[0] if isinstance(intercept, Iterable) else intercept
480
+ intercept = intercept if normalization is None else unscale(sum(
481
+ [-self.output.coef_.flatten()[i] * normalization[name][0] / normalization[name][1] for i, name in
482
+ enumerate(self.dimensions.keys())], intercept), list(normalization.keys())[-1])
483
+ coefs = self.output.coef_.flatten() if normalization is None else [
484
+ self.output.coef_.flatten()[i] / normalization[name][1] * normalization[list(normalization.keys())[-1]][1]
485
+ for i, name in enumerate(self.dimensions.keys())
486
+ ]
331
487
  return list(super().body(variables, ignore, unscale, normalization)) + [linear_function_creator(
332
- list(variables.values()), [to_rounded_real(v) for v in coefs],
333
- to_rounded_real(intercept)
488
+ list(variables.values()), [to_rounded_real(v) for v in coefs], to_rounded_real(intercept)
334
489
  )]
335
490
 
336
491
 
337
492
  class ClassificationCube(HyperCube):
338
- def __init__(self, dimension: dict[str, tuple] = None):
339
- super().__init__(dimension=dimension)
493
+ def __init__(self, dimension: dict[str, tuple] = None, limits: set[Limit] = None, output: str = ""):
494
+ super().__init__(dimension=dimension, limits=limits, output=output)
340
495
 
341
- def update(self, dataset: pd.DataFrame, predictor) -> None:
342
- filtered = self._filter_dataframe(dataset.iloc[:, :-1])
496
+ def update(self, dataset: pd.DataFrame, predictor=None) -> None:
497
+ idx = self.filter_indices(dataset.iloc[:, :-1])
498
+ filtered = dataset.iloc[idx, :-1]
343
499
  if len(filtered > 0):
344
- predictions = predictor.predict(filtered)
500
+ predictions = dataset.iloc[idx, -1] if predictor is None else predictor.predict(filtered)
345
501
  self._output = mode(predictions)
346
- self._diversity = 1 - sum(prediction == self.output for prediction in predictions) / len(filtered)
502
+ self._diversity = self._error = 1 - sum(p == self.output for p in predictions) / len(predictions)
503
+ means = filtered.describe().loc['mean']
504
+ self._barycenter = Point(means.index.values, means.values)
347
505
 
348
506
  def copy(self) -> ClassificationCube:
349
- return ClassificationCube(self.dimensions.copy())
507
+ new_cube = ClassificationCube(self.dimensions.copy(), self._limits.copy(), self.output)
508
+ new_cube.copy_infinite_dimensions(self._infinite_dimensions)
509
+ return new_cube
350
510
 
351
511
 
352
512
  class ClosedCube(HyperCube):
353
- def __init__(self, dimension: dict[str, tuple] = None):
354
- super().__init__(dimension=dimension)
513
+ def __init__(self, dimension: dict[str, tuple] = None, limits: set[Limit] = None,
514
+ output: str | LinearRegression | float = 0.0):
515
+ super().__init__(dimension=dimension, limits=limits, output=output)
355
516
 
356
- def __contains__(self, point: dict[str, float]) -> bool:
357
- return all([(self.get_first(k) <= v <= self.get_second(k)) for k, v in point.items()])
517
+ def __contains__(self, obj: dict[str, float] | ClosedCube) -> bool:
518
+ """
519
+ Note that an object is inside a hypercube if ALL its dimensions' values satisfy:
520
+ min_dim <= object dimension <= max_dim
521
+ :param obj: an N-dimensional object (point or hypercube)
522
+ :return: true if the object is inside the hypercube, false otherwise
523
+ """
524
+ if isinstance(obj, HyperCube):
525
+ for k in obj.dimensions:
526
+ if k not in self._infinite_dimensions:
527
+ if not (self.get_first(k) <= obj.get_first(k) <= obj.get_second(k) <= self.get_second(k)):
528
+ return False
529
+ elif len(self._infinite_dimensions[k]) == 2:
530
+ continue
531
+ elif '+' in self._infinite_dimensions[k] and self.get_first(k) > obj.get_first(k):
532
+ return False
533
+ elif '-' in self._infinite_dimensions[k] and obj.get_second(k) > self.get_second(k):
534
+ return False
535
+ elif isinstance(obj, dict):
536
+ for k, v in obj.items():
537
+ if k not in self._infinite_dimensions:
538
+ if not (self.get_first(k) <= v <= self.get_second(k)):
539
+ return False
540
+ elif len(self._infinite_dimensions[k]) == 2:
541
+ continue
542
+ elif '+' in self._infinite_dimensions[k] and self.get_first(k) > v:
543
+ return False
544
+ elif '-' in self._infinite_dimensions[k] and v > self.get_second(k):
545
+ return False
546
+ else:
547
+ raise TypeError("Invalid type for obj parameter")
548
+ return True
358
549
 
359
550
  def filter_indices(self, dataset: pd.DataFrame) -> ndarray:
360
551
  v = np.array([v for _, v in self._dimensions.items()])
@@ -362,23 +553,35 @@ class ClosedCube(HyperCube):
362
553
  return np.all((v[:, 0] <= ds) & (ds <= v[:, 1]), axis=1)
363
554
 
364
555
  def copy(self) -> ClosedCube:
365
- return ClosedCube(self.dimensions.copy())
556
+ new_cube = ClosedCube(self.dimensions.copy(), self._limits.copy(), self.output)
557
+ new_cube.copy_infinite_dimensions(self._infinite_dimensions)
558
+ return new_cube
366
559
 
367
560
 
368
561
  class ClosedRegressionCube(ClosedCube, RegressionCube):
369
- def __init__(self, dimension: dict[str, tuple] = None):
370
- super().__init__(dimension=dimension)
562
+ def __init__(self, dimension: dict[str, tuple] = None, limits: set[Limit] = None, output=None):
563
+ super().__init__(dimension=dimension, limits=limits, output=LinearRegression() if output is None else output)
371
564
 
372
565
  def copy(self) -> ClosedRegressionCube:
373
- return ClosedRegressionCube(self.dimensions.copy())
566
+ output = LinearRegression()
567
+ try:
568
+ output.coef_ = self.output.coef_.copy()
569
+ output.intercept_ = self.output.intercept_
570
+ except AttributeError:
571
+ pass
572
+ new_cube = ClosedRegressionCube(self.dimensions.copy(), self._limits.copy(), output)
573
+ new_cube.copy_infinite_dimensions(self._infinite_dimensions)
574
+ return new_cube
374
575
 
375
576
 
376
577
  class ClosedClassificationCube(ClosedCube, ClassificationCube):
377
- def __init__(self, dimension: dict[str, tuple] = None):
378
- super().__init__(dimension=dimension)
578
+ def __init__(self, dimension: dict[str, tuple] = None, limits: set[Limit] = None, output: str = None):
579
+ super().__init__(dimension=dimension, limits=limits, output=output)
379
580
 
380
581
  def copy(self) -> ClosedClassificationCube:
381
- return ClosedClassificationCube(self.dimensions.copy())
582
+ new_cube = ClosedClassificationCube(self.dimensions.copy(), self._limits.copy(), self.output)
583
+ new_cube.copy_infinite_dimensions(self._infinite_dimensions)
584
+ return new_cube
382
585
 
383
586
 
384
587
  GenericCube = Union[HyperCube, ClassificationCube, RegressionCube,