psyke 0.8.14.dev6__tar.gz → 0.9.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of psyke might be problematic. Click here for more details.

Files changed (81) hide show
  1. {psyke-0.8.14.dev6/psyke.egg-info → psyke-0.9.0}/PKG-INFO +1 -1
  2. psyke-0.9.0/VERSION +1 -0
  3. {psyke-0.8.14.dev6 → psyke-0.9.0}/psyke/__init__.py +98 -23
  4. psyke-0.9.0/psyke/extraction/__init__.py +51 -0
  5. psyke-0.8.14.dev6/psyke/extraction/cart/predictor.py → psyke-0.9.0/psyke/extraction/cart/CartPredictor.py +49 -4
  6. psyke-0.9.0/psyke/extraction/cart/FairTree.py +196 -0
  7. psyke-0.9.0/psyke/extraction/cart/FairTreePredictor.py +62 -0
  8. psyke-0.9.0/psyke/extraction/cart/__init__.py +71 -0
  9. {psyke-0.8.14.dev6 → psyke-0.9.0}/psyke/extraction/hypercubic/__init__.py +10 -3
  10. {psyke-0.8.14.dev6 → psyke-0.9.0}/psyke/extraction/hypercubic/creepy/__init__.py +1 -1
  11. {psyke-0.8.14.dev6 → psyke-0.9.0}/psyke/extraction/hypercubic/gridex/__init__.py +3 -0
  12. {psyke-0.8.14.dev6 → psyke-0.9.0}/psyke/extraction/hypercubic/iter/__init__.py +5 -0
  13. {psyke-0.8.14.dev6 → psyke-0.9.0}/psyke/extraction/hypercubic/strategy.py +13 -9
  14. {psyke-0.8.14.dev6 → psyke-0.9.0}/psyke/tuning/pedro/__init__.py +4 -2
  15. {psyke-0.8.14.dev6 → psyke-0.9.0}/psyke/utils/logic.py +1 -1
  16. {psyke-0.8.14.dev6 → psyke-0.9.0/psyke.egg-info}/PKG-INFO +1 -1
  17. {psyke-0.8.14.dev6 → psyke-0.9.0}/psyke.egg-info/SOURCES.txt +3 -1
  18. psyke-0.8.14.dev6/VERSION +0 -1
  19. psyke-0.8.14.dev6/psyke/extraction/__init__.py +0 -21
  20. psyke-0.8.14.dev6/psyke/extraction/cart/__init__.py +0 -96
  21. {psyke-0.8.14.dev6 → psyke-0.9.0}/LICENSE +0 -0
  22. {psyke-0.8.14.dev6 → psyke-0.9.0}/MANIFEST.in +0 -0
  23. {psyke-0.8.14.dev6 → psyke-0.9.0}/README.md +0 -0
  24. {psyke-0.8.14.dev6 → psyke-0.9.0}/psyke/clustering/__init__.py +0 -0
  25. {psyke-0.8.14.dev6 → psyke-0.9.0}/psyke/clustering/cream/__init__.py +0 -0
  26. {psyke-0.8.14.dev6 → psyke-0.9.0}/psyke/clustering/exact/__init__.py +0 -0
  27. {psyke-0.8.14.dev6 → psyke-0.9.0}/psyke/clustering/utils.py +0 -0
  28. {psyke-0.8.14.dev6 → psyke-0.9.0}/psyke/extraction/hypercubic/cosmik/__init__.py +0 -0
  29. {psyke-0.8.14.dev6 → psyke-0.9.0}/psyke/extraction/hypercubic/divine/__init__.py +0 -0
  30. {psyke-0.8.14.dev6 → psyke-0.9.0}/psyke/extraction/hypercubic/gridrex/__init__.py +0 -0
  31. {psyke-0.8.14.dev6 → psyke-0.9.0}/psyke/extraction/hypercubic/hex/__init__.py +0 -0
  32. {psyke-0.8.14.dev6 → psyke-0.9.0}/psyke/extraction/hypercubic/hypercube.py +0 -0
  33. {psyke-0.8.14.dev6 → psyke-0.9.0}/psyke/extraction/hypercubic/utils.py +0 -0
  34. {psyke-0.8.14.dev6 → psyke-0.9.0}/psyke/extraction/real/__init__.py +0 -0
  35. {psyke-0.8.14.dev6 → psyke-0.9.0}/psyke/extraction/real/utils.py +0 -0
  36. {psyke-0.8.14.dev6 → psyke-0.9.0}/psyke/extraction/trepan/__init__.py +0 -0
  37. {psyke-0.8.14.dev6 → psyke-0.9.0}/psyke/extraction/trepan/utils.py +0 -0
  38. {psyke-0.8.14.dev6 → psyke-0.9.0}/psyke/hypercubepredictor.py +0 -0
  39. {psyke-0.8.14.dev6 → psyke-0.9.0}/psyke/schema/__init__.py +0 -0
  40. {psyke-0.8.14.dev6 → psyke-0.9.0}/psyke/tuning/__init__.py +0 -0
  41. {psyke-0.8.14.dev6 → psyke-0.9.0}/psyke/tuning/crash/__init__.py +0 -0
  42. {psyke-0.8.14.dev6 → psyke-0.9.0}/psyke/tuning/orchid/__init__.py +0 -0
  43. {psyke-0.8.14.dev6 → psyke-0.9.0}/psyke/utils/__init__.py +0 -0
  44. {psyke-0.8.14.dev6 → psyke-0.9.0}/psyke/utils/dataframe.py +0 -0
  45. {psyke-0.8.14.dev6 → psyke-0.9.0}/psyke/utils/metrics.py +0 -0
  46. {psyke-0.8.14.dev6 → psyke-0.9.0}/psyke/utils/plot.py +0 -0
  47. {psyke-0.8.14.dev6 → psyke-0.9.0}/psyke/utils/sorted.py +0 -0
  48. {psyke-0.8.14.dev6 → psyke-0.9.0}/psyke.egg-info/dependency_links.txt +0 -0
  49. {psyke-0.8.14.dev6 → psyke-0.9.0}/psyke.egg-info/not-zip-safe +0 -0
  50. {psyke-0.8.14.dev6 → psyke-0.9.0}/psyke.egg-info/requires.txt +0 -0
  51. {psyke-0.8.14.dev6 → psyke-0.9.0}/psyke.egg-info/top_level.txt +0 -0
  52. {psyke-0.8.14.dev6 → psyke-0.9.0}/pyproject.toml +0 -0
  53. {psyke-0.8.14.dev6 → psyke-0.9.0}/setup.cfg +0 -0
  54. {psyke-0.8.14.dev6 → psyke-0.9.0}/setup.py +0 -0
  55. {psyke-0.8.14.dev6 → psyke-0.9.0}/test/psyke/__init__.py +0 -0
  56. {psyke-0.8.14.dev6 → psyke-0.9.0}/test/psyke/clustering/__init__.py +0 -0
  57. {psyke-0.8.14.dev6 → psyke-0.9.0}/test/psyke/extraction/__init__.py +0 -0
  58. {psyke-0.8.14.dev6 → psyke-0.9.0}/test/psyke/extraction/cart/__init__.py +0 -0
  59. {psyke-0.8.14.dev6 → psyke-0.9.0}/test/psyke/extraction/cart/test_cart.py +0 -0
  60. {psyke-0.8.14.dev6 → psyke-0.9.0}/test/psyke/extraction/cart/test_simplified_cart.py +0 -0
  61. {psyke-0.8.14.dev6 → psyke-0.9.0}/test/psyke/extraction/hypercubic/__init__.py +0 -0
  62. {psyke-0.8.14.dev6 → psyke-0.9.0}/test/psyke/extraction/hypercubic/gridex/__init__.py +0 -0
  63. {psyke-0.8.14.dev6 → psyke-0.9.0}/test/psyke/extraction/hypercubic/gridex/test_gridex.py +0 -0
  64. {psyke-0.8.14.dev6 → psyke-0.9.0}/test/psyke/extraction/hypercubic/iter/__init__.py +0 -0
  65. {psyke-0.8.14.dev6 → psyke-0.9.0}/test/psyke/extraction/hypercubic/iter/test_iter.py +0 -0
  66. {psyke-0.8.14.dev6 → psyke-0.9.0}/test/psyke/extraction/hypercubic/test_hypercube.py +0 -0
  67. {psyke-0.8.14.dev6 → psyke-0.9.0}/test/psyke/extraction/real/__init__.py +0 -0
  68. {psyke-0.8.14.dev6 → psyke-0.9.0}/test/psyke/extraction/real/test_real.py +0 -0
  69. {psyke-0.8.14.dev6 → psyke-0.9.0}/test/psyke/extraction/real/test_rule.py +0 -0
  70. {psyke-0.8.14.dev6 → psyke-0.9.0}/test/psyke/extraction/trepan/__init__.py +0 -0
  71. {psyke-0.8.14.dev6 → psyke-0.9.0}/test/psyke/extraction/trepan/test_node.py +0 -0
  72. {psyke-0.8.14.dev6 → psyke-0.9.0}/test/psyke/extraction/trepan/test_split.py +0 -0
  73. {psyke-0.8.14.dev6 → psyke-0.9.0}/test/psyke/extraction/trepan/test_trepan.py +0 -0
  74. {psyke-0.8.14.dev6 → psyke-0.9.0}/test/psyke/utils/__init__.py +0 -0
  75. {psyke-0.8.14.dev6 → psyke-0.9.0}/test/psyke/utils/test_prune.py +0 -0
  76. {psyke-0.8.14.dev6 → psyke-0.9.0}/test/psyke/utils/test_simplify.py +0 -0
  77. {psyke-0.8.14.dev6 → psyke-0.9.0}/test/psyke/utils/test_simplify_formatter.py +0 -0
  78. {psyke-0.8.14.dev6 → psyke-0.9.0}/test/resources/__init__.py +0 -0
  79. {psyke-0.8.14.dev6 → psyke-0.9.0}/test/resources/datasets/__init__.py +0 -0
  80. {psyke-0.8.14.dev6 → psyke-0.9.0}/test/resources/predictors/__init__.py +0 -0
  81. {psyke-0.8.14.dev6 → psyke-0.9.0}/test/resources/tests/__init__.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: psyke
3
- Version: 0.8.14.dev6
3
+ Version: 0.9.0
4
4
  Summary: Python-based implementation of PSyKE, i.e. a Platform for Symbolic Knowledge Extraction
5
5
  Home-page: https://github.com/psykei/psyke-python
6
6
  Author: Matteo Magnini
psyke-0.9.0/VERSION ADDED
@@ -0,0 +1 @@
1
+ 0.9.0
@@ -5,16 +5,20 @@ from enum import Enum
5
5
 
6
6
  import numpy as np
7
7
  import pandas as pd
8
+ from matplotlib import pyplot as plt
8
9
  from sklearn.linear_model import LinearRegression
9
10
  from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score, f1_score, accuracy_score, \
10
11
  adjusted_rand_score, adjusted_mutual_info_score, v_measure_score, fowlkes_mallows_score
12
+ from tuprolog.solve.prolog import prolog_solver
11
13
 
12
14
  from psyke.schema import DiscreteFeature
13
15
  from psyke.utils import get_default_random_seed, Target, get_int_precision
14
- from tuprolog.theory import Theory
16
+ from tuprolog.theory import Theory, mutable_theory
15
17
  from typing import Iterable
16
18
  import logging
17
19
 
20
+ from psyke.utils.logic import get_in_rule, data_to_struct, get_not_in_rule
21
+
18
22
  logging.basicConfig(level=logging.WARN)
19
23
  logger = logging.getLogger('psyke')
20
24
 
@@ -52,7 +56,7 @@ class EvaluableModel(object):
52
56
  """
53
57
  Predicts the output values of every sample in dataset.
54
58
 
55
- :param dataframe: is the set of instances to predict.
59
+ :param dataframe: the set of instances to predict.
56
60
  :return: a list of predictions.
57
61
  """
58
62
  return self.__convert(self._predict(dataframe))
@@ -85,7 +89,7 @@ class EvaluableModel(object):
85
89
  def score(self, dataframe: pd.DataFrame, predictor=None, fidelity: bool = False, completeness: bool = True,
86
90
  brute: bool = False, criterion: str = 'corners', n: int = 2,
87
91
  task: EvaluableModel.Task = Task.CLASSIFICATION,
88
- scoring_function: Iterable[EvaluableModel.Score] = [ClassificationScore.ACCURACY]):
92
+ scoring_function: Iterable[EvaluableModel.Score] = (ClassificationScore.ACCURACY, )):
89
93
  extracted = np.array(
90
94
  self.predict(dataframe.iloc[:, :-1]) if not brute else
91
95
  self.brute_predict(dataframe.iloc[:, :-1], criterion, n)
@@ -151,42 +155,113 @@ class Extractor(EvaluableModel, ABC):
151
155
  def __init__(self, predictor, discretization: Iterable[DiscreteFeature] = None, normalization=None):
152
156
  super().__init__(discretization, normalization)
153
157
  self.predictor = predictor
158
+ self.theory = None
154
159
 
155
160
  def extract(self, dataframe: pd.DataFrame) -> Theory:
156
161
  """
157
162
  Extracts rules from the underlying predictor.
158
163
 
159
- :param dataframe: is the set of instances to be used for the extraction.
164
+ :param dataframe: the set of instances to be used for the extraction.
160
165
  :return: the theory created from the extracted rules.
161
166
  """
162
167
  raise NotImplementedError('extract')
163
168
 
164
- def predict_why(self, data: dict[str, float], verbose=True):
169
+ def predict_why(self, data: dict[str, float], verbose: bool = True):
165
170
  """
166
171
  Provides a prediction and the corresponding explanation.
167
- :param data: is the instance to predict.
168
- :param verbose: if the explanation has to be printed.
172
+ :param data: the instance to predict.
173
+ :param verbose: if True the explanation is printed.
169
174
  """
170
175
  raise NotImplementedError('predict_why')
171
176
 
172
- def predict_counter(self, data: dict[str, float], verbose=True, only_first=True):
177
+ def predict_counter(self, data: dict[str, float], verbose: bool = True, only_first: bool = True):
173
178
  """
174
179
  Provides a prediction and counterfactual explanations.
175
- :param data: is the instance to predict.
176
- :param verbose: if the counterfactual explanation has to be printed.
177
- :param only_first: if only the closest counterfactual explanation is provided for each distinct class.
180
+ :param data: the instance to predict.
181
+ :param verbose: if True the counterfactual explanation is printed.
182
+ :param only_first: if True only the closest counterfactual explanation is provided for each distinct class.
178
183
  """
179
184
  raise NotImplementedError('predict_counter')
180
185
 
186
+ def plot_fairness(self, dataframe: pd.DataFrame, groups: dict[str, list], colormap='seismic_r', filename=None,
187
+ figsize=(5, 4)):
188
+ """
189
+ Provides a visual estimation of the fairness exhibited by an extractor with respect to the specified groups.
190
+ :param dataframe: the set of instances to be used for the estimation.
191
+ :param groups: the set of relevant groups to consider.
192
+ :param colormap: the colormap to use for the plot.
193
+ :param filename: if not None, name used to save the plot.
194
+ :param figsize: size of the plot.
195
+ """
196
+ counts = {group: len(dataframe[idx_g]) for group, idx_g in groups.items()}
197
+ output = {'labels': []}
198
+ for group in groups:
199
+ output[group] = []
200
+ for i, clause in enumerate(self.theory.clauses):
201
+ if len(dataframe) == 0:
202
+ break
203
+ solver = prolog_solver(static_kb=mutable_theory(clause).assertZ(get_in_rule()).assertZ(get_not_in_rule()))
204
+ idx = np.array([query.is_yes for query in
205
+ [solver.solveOnce(data_to_struct(data)) for _, data in dataframe.iterrows()]])
206
+ # print(f'Rule {i + 1}. Outcome {clause.head.args[-1]}. Affecting', end='')
207
+ output['labels'].append(str(clause.head.args[-1]))
208
+ for group, idx_g in groups.items():
209
+ # print(f' {len(dataframe[idx & idx_g]) / counts[group]:.2f}%{group}', end='')
210
+ output[group].append(len(dataframe[idx & idx_g]) / counts[group])
211
+ dataframe = dataframe[~idx]
212
+ groups = {group: indices[~idx] for group, indices in groups.items()}
213
+ # print(f'. Left {len(dataframe)} instances')
214
+
215
+ binary = len(set(output['labels'])) == 2
216
+ labels = sorted(set(output['labels']))
217
+ data = np.vstack([output[group] for group in groups]).T * 100
218
+ if binary:
219
+ data[np.array(output['labels']) == labels[0]] *= -1
220
+
221
+ plt.figure(figsize=figsize)
222
+ plt.imshow(data, cmap=colormap, vmin=-100 if binary else 0, vmax=100)
223
+
224
+ plt.gca().set_xticks(range(len(groups)), labels=groups.keys())
225
+ plt.gca().set_yticks(range(len(output['labels'])),
226
+ labels=[f'Rule {i + 1}\n{l}' for i, l in enumerate(output['labels'])])
227
+
228
+ plt.xlabel('Groups')
229
+ plt.ylabel('Rules')
230
+ plt.title("Rule set impact on groups")
231
+
232
+ for i in range(len(output['labels'])):
233
+ for j in range(len(groups)):
234
+ plt.gca().text(j, i, f'{abs(int(data[i, j]))}%', ha="center", va="center", color="k")
235
+
236
+ plt.gca().set_xticks([i + .5 for i in range(len(groups))], minor=True)
237
+ plt.gca().set_yticks([i + .5 for i in range(len(output['labels']))], minor=True)
238
+ plt.gca().grid(which='minor', color='k', linestyle='-', linewidth=.8)
239
+ plt.gca().tick_params(which='minor', bottom=False, left=False)
240
+ cbarticks = np.linspace(-100 if binary else 0, 100, 9 if binary else 11, dtype=int)
241
+ cbar = plt.colorbar(fraction=0.046, label='Affected samples (%)', ticks=cbarticks)
242
+ if binary:
243
+ ticklabels = [str(-i) if i < 0 else str(i) for i in cbarticks]
244
+ ticklabels[0] += f' {labels[0]}'
245
+ ticklabels[-1] += f' {labels[-1]}'
246
+ cbar.ax.set_yticklabels(ticklabels)
247
+
248
+ plt.tight_layout()
249
+ if filename is not None:
250
+ plt.savefig(filename, dpi=500)
251
+ plt.show()
252
+
253
+ def make_fair(self, features: Iterable[str]):
254
+ raise NotImplementedError(f'Fairness for {type(self).__name__} is not supported at the moment')
255
+
181
256
  def mae(self, dataframe: pd.DataFrame, predictor=None, brute: bool = False, criterion: str = 'center',
182
257
  n: int = 3) -> float:
183
258
  """
184
259
  Calculates the predictions' MAE w.r.t. the instances given as input.
185
260
 
186
- :param dataframe: is the set of instances to be used to calculate the mean absolute error.
261
+ :param dataframe: the set of instances to be used to calculate the mean absolute error.
187
262
  :param predictor: if provided, its predictions on the dataframe are taken instead of the dataframe instances.
188
263
  :param brute: if True, a brute prediction is executed.
189
- :param criterion: creterion for brute prediction.
264
+ :param criterion: criterion for brute prediction.
190
265
  :param n: number of points for brute prediction with 'perimeter' criterion.
191
266
  :return: the mean absolute error (MAE) of the predictions.
192
267
  """
@@ -198,10 +273,10 @@ class Extractor(EvaluableModel, ABC):
198
273
  """
199
274
  Calculates the predictions' MSE w.r.t. the instances given as input.
200
275
 
201
- :param dataframe: is the set of instances to be used to calculate the mean squared error.
276
+ :param dataframe: the set of instances to be used to calculate the mean squared error.
202
277
  :param predictor: if provided, its predictions on the dataframe are taken instead of the dataframe instances.
203
278
  :param brute: if True, a brute prediction is executed.
204
- :param criterion: creterion for brute prediction.
279
+ :param criterion: criterion for brute prediction.
205
280
  :param n: number of points for brute prediction with 'perimeter' criterion.
206
281
  :return: the mean squared error (MSE) of the predictions.
207
282
  """
@@ -213,10 +288,10 @@ class Extractor(EvaluableModel, ABC):
213
288
  """
214
289
  Calculates the predictions' R2 score w.r.t. the instances given as input.
215
290
 
216
- :param dataframe: is the set of instances to be used to calculate the R2 score.
291
+ :param dataframe: the set of instances to be used to calculate the R2 score.
217
292
  :param predictor: if provided, its predictions on the dataframe are taken instead of the dataframe instances.
218
293
  :param brute: if True, a brute prediction is executed.
219
- :param criterion: creterion for brute prediction.
294
+ :param criterion: criterion for brute prediction.
220
295
  :param n: number of points for brute prediction with 'perimeter' criterion.
221
296
  :return: the R2 score of the predictions.
222
297
  """
@@ -224,14 +299,14 @@ class Extractor(EvaluableModel, ABC):
224
299
  Extractor.Task.REGRESSION, [Extractor.RegressionScore.R2])[Extractor.RegressionScore.R2][-1]
225
300
 
226
301
  def accuracy(self, dataframe: pd.DataFrame, predictor=None, brute: bool = False, criterion: str = 'center',
227
- n: int = 3) -> float:
302
+ n: int = 3) -> float:
228
303
  """
229
304
  Calculates the predictions' accuracy classification score w.r.t. the instances given as input.
230
305
 
231
- :param dataframe: is the set of instances to be used to calculate the accuracy classification score.
306
+ :param dataframe: the set of instances to be used to calculate the accuracy classification score.
232
307
  :param predictor: if provided, its predictions on the dataframe are taken instead of the dataframe instances.
233
308
  :param brute: if True, a brute prediction is executed.
234
- :param criterion: creterion for brute prediction.
309
+ :param criterion: criterion for brute prediction.
235
310
  :param n: number of points for brute prediction with 'perimeter' criterion.
236
311
  :return: the accuracy classification score of the predictions.
237
312
  """
@@ -244,10 +319,10 @@ class Extractor(EvaluableModel, ABC):
244
319
  """
245
320
  Calculates the predictions' F1 score w.r.t. the instances given as input.
246
321
 
247
- :param dataframe: is the set of instances to be used to calculate the F1 score.
322
+ :param dataframe: the set of instances to be used to calculate the F1 score.
248
323
  :param predictor: if provided, its predictions on the dataframe are taken instead of the dataframe instances.
249
324
  :param brute: if True, a brute prediction is executed.
250
- :param criterion: creterion for brute prediction.
325
+ :param criterion: criterion for brute prediction.
251
326
  :param n: number of points for brute prediction with 'perimeter' criterion.
252
327
  :return: the F1 score of the predictions.
253
328
  """
@@ -331,7 +406,7 @@ class Extractor(EvaluableModel, ABC):
331
406
 
332
407
  @staticmethod
333
408
  def creepy(predictor, clustering, depth: int, error_threshold: float, output: Target = Target.CONSTANT,
334
- gauss_components: int = 2, ranks: [(str, float)] = [], ignore_threshold: float = 0.0,
409
+ gauss_components: int = 2, ranks: Iterable[(str, float)] = tuple(), ignore_threshold: float = 0.0,
335
410
  discretization=None, normalization: dict[str, tuple[float, float]] = None,
336
411
  seed: int = get_default_random_seed()) -> Extractor:
337
412
  """
@@ -0,0 +1,51 @@
1
+ from abc import ABC
2
+ from collections import Iterable
3
+
4
+ import pandas as pd
5
+ from tuprolog.theory import Theory
6
+
7
+ from psyke import Extractor
8
+
9
+
10
+ class PedagogicalExtractor(Extractor, ABC):
11
+
12
+ def __init__(self, predictor, discretization=None, normalization=None):
13
+ Extractor.__init__(self, predictor=predictor, discretization=discretization, normalization=normalization)
14
+
15
+ def _substitute_output(self, dataframe: pd.DataFrame) -> pd.DataFrame:
16
+ new_y = pd.DataFrame(self.predictor.predict(dataframe.iloc[:, :-1])).set_index(dataframe.index)
17
+ data = dataframe.iloc[:, :-1].copy().join(new_y)
18
+ data.columns = dataframe.columns
19
+ return data
20
+
21
+ def extract(self, dataframe: pd.DataFrame) -> Theory:
22
+ self.theory = self._extract(self._substitute_output(dataframe))
23
+ return self.theory
24
+
25
+ def _extract(self, dataframe: pd.DataFrame) -> Theory:
26
+ raise NotImplementedError('extract')
27
+
28
+
29
+ class FairExtractor(PedagogicalExtractor, ABC):
30
+
31
+ def __init__(self, extractor: Extractor, features: Iterable):
32
+ super().__init__(extractor.predictor, extractor.discretization, extractor.normalization)
33
+ self.features = features
34
+ self.extractor = extractor
35
+ # self.make_fair()
36
+
37
+ # def extract(self, dataframe: pd.DataFrame) -> Theory:
38
+ # self.theory = self.extractor.extract(dataframe)
39
+ # return self.theory
40
+
41
+ # def predict_why(self, data: dict[str, float], verbose: bool = True):
42
+ # self.extractor.predict_why(data, verbose)
43
+
44
+ # def predict_counter(self, data: dict[str, float], verbose: bool = True, only_first: bool = True):
45
+ # self.extractor.predict_counter(data, verbose, only_first)
46
+
47
+ # def _predict(self, dataframe: pd.DataFrame) -> Iterable:
48
+ # return self.extractor.predict(dataframe)
49
+
50
+ # def _brute_predict(self, dataframe: pd.DataFrame, criterion: str = 'corner', n: int = 2) -> Iterable:
51
+ # return self.extractor.brute_predict(dataframe, criterion, n)
@@ -1,11 +1,14 @@
1
1
  from collections import Iterable
2
2
  from typing import Union, Any
3
3
  import numpy as np
4
+ import pandas as pd
4
5
  from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor
5
- from psyke.schema import Value, LessThan, GreaterThan, SchemaException
6
+ from tuprolog.core import clause, Var, Struct
7
+ from tuprolog.theory import Theory, mutable_theory
6
8
 
7
- LeafConstraints = dict[str, list[Value]]
8
- LeafSequence = Iterable[tuple[LeafConstraints, Any]]
9
+ from psyke.extraction.cart import LeafConstraints, LeafSequence
10
+ from psyke.schema import LessThan, GreaterThan, SchemaException, DiscreteFeature
11
+ from psyke.utils.logic import create_variable_list, create_head, create_term
9
12
 
10
13
 
11
14
  class CartPredictor:
@@ -14,8 +17,9 @@ class CartPredictor:
14
17
  """
15
18
 
16
19
  def __init__(self, predictor: Union[DecisionTreeClassifier, DecisionTreeRegressor] = DecisionTreeClassifier(),
17
- normalization=None):
20
+ discretization=None, normalization=None):
18
21
  self._predictor = predictor
22
+ self.discretization = discretization
19
23
  self.normalization = normalization
20
24
 
21
25
  def __get_constraints(self, nodes: Iterable[(int, bool)]) -> LeafConstraints:
@@ -62,6 +66,47 @@ class CartPredictor:
62
66
  def predict(self, data) -> Iterable:
63
67
  return self._predictor.predict(data)
64
68
 
69
+ @staticmethod
70
+ def _simplify_nodes(nodes: list) -> Iterable:
71
+ simplified = [nodes.pop(0)]
72
+ while len(nodes) > 0:
73
+ first_node = nodes[0][0]
74
+ for k, conditions in first_node.items():
75
+ for condition in conditions:
76
+ if all(k in node[0] and condition in node[0][k] for node in nodes):
77
+ [node[0][k].remove(condition) for node in nodes]
78
+ simplified.append(nodes.pop(0))
79
+ return [({k: v for k, v in rule.items() if v != []}, prediction) for rule, prediction in simplified]
80
+
81
+ def _create_body(self, variables: dict[str, Var], conditions: LeafConstraints) -> Iterable[Struct]:
82
+ results = []
83
+ for feature_name, cond_list in conditions.items():
84
+ for condition in cond_list:
85
+ feature: DiscreteFeature = [d for d in self.discretization if feature_name in d.admissible_values][0] \
86
+ if self.discretization else None
87
+ results.append(create_term(variables[feature_name], condition) if feature is None else
88
+ create_term(variables[feature.name],
89
+ feature.admissible_values[feature_name],
90
+ isinstance(condition, GreaterThan)))
91
+ return results
92
+
93
+ def create_theory(self, data: pd.DataFrame, simplify: True) -> Theory:
94
+ new_theory = mutable_theory()
95
+ nodes = [node for node in self]
96
+ nodes = self._simplify_nodes(nodes) if simplify else nodes
97
+ for (constraints, prediction) in nodes:
98
+ if self.normalization is not None and data.columns[-1] in self.normalization:
99
+ m, s = self.normalization[data.columns[-1]]
100
+ prediction = prediction * s + m
101
+ variables = create_variable_list(self.discretization, data)
102
+ new_theory.assertZ(
103
+ clause(
104
+ create_head(data.columns[-1], list(variables.values()), prediction),
105
+ self._create_body(variables, constraints)
106
+ )
107
+ )
108
+ return new_theory
109
+
65
110
  @property
66
111
  def predictor(self) -> Union[DecisionTreeClassifier, DecisionTreeRegressor]:
67
112
  return self._predictor
@@ -0,0 +1,196 @@
1
+ import numpy as np
2
+ from collections import Counter
3
+
4
+ from sklearn.metrics import accuracy_score, r2_score
5
+
6
+
7
+ class Node:
8
+ def __init__(self, feature=None, threshold=None, left=None, right=None, *, value=None):
9
+ self.feature = feature
10
+ self.threshold = threshold
11
+ self.left = left
12
+ self.right = right
13
+ self.value = value
14
+
15
+ def is_leaf_node(self):
16
+ return self.value is not None
17
+
18
+
19
+ class FairTree:
20
+ def __init__(self, max_depth=3, max_leaves=None, criterion=None, min_samples_split=2, lambda_penalty=0.0,
21
+ protected_attr=None):
22
+ self.max_depth = max_depth
23
+ self.max_leaves = max_leaves
24
+ self.min_samples_split = min_samples_split
25
+ self.lambda_penalty = lambda_penalty
26
+ self.protected_attr = protected_attr
27
+ self.criterion = criterion
28
+ self.root = None
29
+ self.n_leaves = 0
30
+ self.quality_function = None
31
+
32
+ def fit(self, X, y):
33
+ self.n_leaves = 0
34
+ self.root = self._grow_tree(X, y, depth=0)
35
+ while self.n_leaves > self.max_leaves:
36
+ self.prune_least_important_leaf(X, y)
37
+ self.n_leaves -= 1
38
+ return self
39
+
40
+ @staticmethod
41
+ def _estimate_output(y):
42
+ raise NotImplementedError
43
+
44
+ def score(self, X, y):
45
+ raise NotImplementedError
46
+
47
+ def predict(self, X):
48
+ return np.array([self._traverse_tree(x, self.root) for _, x in X.iterrows()])
49
+
50
+ def _traverse_tree(self, x, node):
51
+ if node.is_leaf_node():
52
+ return node.value
53
+ if x[node.feature] <= node.threshold:
54
+ return self._traverse_tree(x, node.left)
55
+ return self._traverse_tree(x, node.right)
56
+
57
+ def _grow_tree(self, X, y, depth):
58
+ if depth >= self.max_depth or X.shape[0] < self.min_samples_split or len(set(y.values.flatten())) == 1 or \
59
+ (self.max_leaves is not None and self.n_leaves >= self.max_leaves):
60
+ self.n_leaves += 1
61
+ return Node(value=self._estimate_output(y))
62
+
63
+ best_feature, best_threshold = self._best_split(X, y)
64
+ if best_feature is None:
65
+ self.n_leaves += 1
66
+ return Node(value=self._estimate_output(y))
67
+
68
+ left_idxs = X[best_feature] <= best_threshold
69
+ right_idxs = X[best_feature] > best_threshold
70
+
71
+ left = self._grow_tree(X[left_idxs], y[left_idxs], depth + 1)
72
+ right = self._grow_tree(X[right_idxs], y[right_idxs], depth + 1)
73
+ return Node(best_feature, best_threshold, left, right)
74
+
75
+ def _best_split(self, X, y):
76
+ best_gain = -float('inf')
77
+ split_idx, split_threshold = None, None
78
+
79
+ for feature in [feature for feature in X.columns if feature not in self.protected_attr]:
80
+ for threshold in np.unique(np.quantile(X[feature], np.linspace(0, 1, num=25))):
81
+ left_idxs = X[feature] <= threshold
82
+ right_idxs = X[feature] > threshold
83
+
84
+ if left_idxs.sum() == 0 or right_idxs.sum() == 0:
85
+ continue
86
+
87
+ gain = self._fair_gain(y, left_idxs, right_idxs, X[self.protected_attr])
88
+
89
+ if gain > best_gain:
90
+ best_gain = gain
91
+ split_idx = feature
92
+ split_threshold = threshold
93
+ return split_idx, split_threshold
94
+
95
+ @staticmethod
96
+ def _disparity(group):
97
+ counts = Counter(group)
98
+ if len(counts) <= 1:
99
+ return 0.0
100
+ values = np.array(list(counts.values())) / len(group)
101
+ return np.abs(values[0] - values[1])
102
+
103
+ def _fair_gain(self, y, left_idx, right_idx, protected):
104
+ child = len(y[left_idx]) / len(y) * self.quality_function(y[left_idx]) + \
105
+ len(y[right_idx]) / len(y) * self.quality_function(y[right_idx])
106
+ info_gain = self.quality_function(y) - child
107
+ penalty = self._disparity(protected[left_idx]) + self._disparity(protected[right_idx])
108
+ return info_gain - self.lambda_penalty * penalty
109
+
110
+ @staticmethod
111
+ def _match_path(x, path):
112
+ for node, left in path:
113
+ if left and x[node.feature] > node.threshold:
114
+ return False
115
+ if not left and x[node.feature] <= node.threshold:
116
+ return False
117
+ return True
118
+
119
+ @staticmethod
120
+ def candidates(node, parent=None, is_left=None, path=[]):
121
+ if node is None or node.is_leaf_node():
122
+ return []
123
+ leaves = []
124
+ if node.left.is_leaf_node() and node.right.is_leaf_node():
125
+ leaves.append((node, parent, is_left, path))
126
+ leaves += FairTreeClassifier.candidates(node.left, node, True, path + [(node, True)])
127
+ leaves += FairTreeClassifier.candidates(node.right, node, False, path + [(node, False)])
128
+ return leaves
129
+
130
+ def prune_least_important_leaf(self, X, y):
131
+ best_score = -np.inf
132
+ best_prune = None
133
+
134
+ for node, parent, is_left, path in self.candidates(self.root):
135
+ original_left = node.left
136
+ original_right = node.right
137
+
138
+ merged_y = y[(X.apply(lambda x: self._match_path(x, path), axis=1))]
139
+ if len(merged_y) == 0:
140
+ continue
141
+ new_value = self._estimate_output(merged_y)
142
+ node.left = node.right = None
143
+ node.value = new_value
144
+
145
+ score = self.score(X, y)
146
+ if score >= best_score:
147
+ best_score = score
148
+ best_prune = (node, new_value)
149
+
150
+ node.left, node.right, node.value = original_left, original_right, None
151
+
152
+ if best_prune:
153
+ best_prune[0].left = best_prune[0].right = None
154
+ best_prune[0].value = best_prune[1]
155
+
156
+
157
+ class FairTreeClassifier(FairTree):
158
+ def __init__(self, max_depth=3, max_leaves=None, criterion='entropy', min_samples_split=2, lambda_penalty=0.0,
159
+ protected_attr=None):
160
+ super().__init__(max_depth, max_leaves, criterion, min_samples_split, lambda_penalty, protected_attr)
161
+ self.quality_function = self._gini if self.criterion == 'gini' else self._entropy
162
+
163
+ @staticmethod
164
+ def _estimate_output(y):
165
+ return Counter(y.values.flatten()).most_common(1)[0][0]
166
+
167
+ def score(self, X, y):
168
+ return accuracy_score(y.values.flatten(), self.predict(X))
169
+
170
+ @staticmethod
171
+ def _entropy(y):
172
+ ps = np.unique(y, return_counts=True)[1] / len(y)
173
+ return -np.sum([p * np.log2(p) for p in ps if p > 0])
174
+
175
+ @staticmethod
176
+ def _gini(y):
177
+ return 1.0 - np.sum(np.unique(y, return_counts=True)[1] / len(y)**2)
178
+
179
+
180
+ class FairTreeRegressor(FairTree):
181
+ def __init__(self, max_depth=3, max_leaves=None, criterion='mse', min_samples_split=2, lambda_penalty=0.0,
182
+ protected_attr=None):
183
+ super().__init__(max_depth, max_leaves, criterion, min_samples_split, lambda_penalty, protected_attr)
184
+ self.quality_function = self._mse
185
+
186
+ @staticmethod
187
+ def _estimate_output(y):
188
+ return np.mean(y.values.flatten())
189
+
190
+ def score(self, X, y):
191
+ return r2_score(y.values.flatten(), self.predict(X))
192
+
193
+ @staticmethod
194
+ def _mse(y):
195
+ y = y.values.flatten().astype(float)
196
+ return np.mean((y - np.mean(y))**2)
@@ -0,0 +1,62 @@
1
+ import copy
2
+ from collections import Iterable
3
+ from typing import Union, Any
4
+ import pandas as pd
5
+ from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor
6
+ from tuprolog.core import clause, Var, Struct
7
+ from tuprolog.theory import Theory, mutable_theory
8
+
9
+ from psyke.extraction.cart import FairTreeClassifier, FairTreeRegressor, LeafSequence, LeafConstraints
10
+ from psyke.extraction.cart.CartPredictor import CartPredictor
11
+ from psyke.schema import LessThan, GreaterThan, SchemaException, DiscreteFeature, Value
12
+ from psyke.utils.logic import create_variable_list, create_head, create_term
13
+
14
+
15
+ class FairTreePredictor(CartPredictor):
16
+ """
17
+ A wrapper for fair decision and regression trees of psyke.
18
+ """
19
+
20
+ def __init__(self, predictor: Union[FairTreeClassifier, FairTreeRegressor] = DecisionTreeClassifier(),
21
+ discretization=None, normalization=None):
22
+ super().__init__(predictor, discretization, normalization)
23
+
24
+ def __iter__(self) -> LeafSequence:
25
+ leaves = [node for node in self.recurse(self._predictor.root, {})]
26
+ return (leaf for leaf in leaves)
27
+
28
+ @staticmethod
29
+ def merge_constraints(constraints: LeafConstraints, constraint: Value, feature: str):
30
+ if feature in constraints:
31
+ try:
32
+ constraints[feature][-1] *= constraint
33
+ except SchemaException:
34
+ constraints[feature].append(constraint)
35
+ else:
36
+ constraints[feature] = [constraint]
37
+ return constraints
38
+
39
+ def recurse(self, node, constraints) -> Union[LeafSequence, tuple[LeafConstraints, Any]]:
40
+ if node.is_leaf_node():
41
+ return constraints, node.value
42
+
43
+ feature = node.feature
44
+ threshold = node.threshold if self.normalization is None else \
45
+ (node.threshold * self.normalization[feature][1] + self.normalization[feature][0])
46
+
47
+ left = self.recurse(node.left, self.merge_constraints(copy.deepcopy(constraints), LessThan(threshold), feature))
48
+ right = self.recurse(node.right, self.merge_constraints(copy.deepcopy(constraints),
49
+ GreaterThan(threshold), feature))
50
+ return (left if isinstance(left, list) else [left]) + (right if isinstance(right, list) else [right])
51
+
52
+ @property
53
+ def predictor(self) -> Union[FairTreeClassifier, FairTreeRegressor]:
54
+ return self._predictor
55
+
56
+ @property
57
+ def n_leaves(self) -> int:
58
+ return self._predictor.n_leaves
59
+
60
+ @predictor.setter
61
+ def predictor(self, predictor: Union[FairTreeClassifier, FairTreeRegressor]):
62
+ self._predictor = predictor
@@ -0,0 +1,71 @@
1
+ from abc import ABC
2
+
3
+ from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor
4
+
5
+ from psyke.extraction import PedagogicalExtractor
6
+ from psyke import get_default_random_seed
7
+ from psyke.extraction.cart.FairTree import FairTreeClassifier, FairTreeRegressor
8
+ from psyke.schema import DiscreteFeature, Value
9
+ from tuprolog.theory import Theory
10
+ from typing import Iterable, Any
11
+ import pandas as pd
12
+
13
+
14
+ TREE_SEED = get_default_random_seed()
15
+
16
+ LeafConstraints = dict[str, list[Value]]
17
+ LeafSequence = Iterable[tuple[LeafConstraints, Any]]
18
+
19
+
20
+ class Cart(PedagogicalExtractor, ABC):
21
+
22
+ def __init__(self, predictor, max_depth: int = 3, max_leaves: int = None, max_features=None,
23
+ discretization: Iterable[DiscreteFeature] = None,
24
+ normalization=None, simplify: bool = True):
25
+ from psyke.extraction.cart.CartPredictor import CartPredictor
26
+
27
+ super().__init__(predictor, discretization, normalization)
28
+ self.is_fair = None
29
+ self._cart_predictor = CartPredictor(discretization=discretization, normalization=normalization)
30
+ self.depth = max_depth
31
+ self.leaves = max_leaves
32
+ self.max_features = max_features
33
+ self._simplify = simplify
34
+
35
+ def _extract(self, data: pd.DataFrame) -> Theory:
36
+ from psyke.extraction.cart.FairTreePredictor import FairTreePredictor
37
+
38
+ if self.is_fair:
39
+ self._cart_predictor = FairTreePredictor(discretization=self.discretization,
40
+ normalization=self.normalization)
41
+ fair_tree = FairTreeClassifier if isinstance(data.iloc[0, -1], str) else FairTreeRegressor
42
+ self._cart_predictor.predictor = fair_tree(max_depth=self.depth, max_leaves=self.leaves,
43
+ protected_attr=self.is_fair)
44
+ else:
45
+ tree = DecisionTreeClassifier if isinstance(data.iloc[0, -1], str) else DecisionTreeRegressor
46
+ self._cart_predictor.predictor = tree(random_state=TREE_SEED, max_depth=self.depth,
47
+ max_leaf_nodes=self.leaves, max_features=self.max_features)
48
+ self._cart_predictor.predictor.fit(data.iloc[:, :-1], data.iloc[:, -1])
49
+ return self._cart_predictor.create_theory(data, self._simplify)
50
+
51
+ def make_fair(self, features: Iterable[str]):
52
+ self.is_fair = features
53
+
54
+ def _predict(self, dataframe: pd.DataFrame) -> Iterable:
55
+ return self._cart_predictor.predict(dataframe)
56
+
57
+ def predict_why(self, data: dict[str, float], verbose=True):
58
+ prediction = None
59
+ conditions = {}
60
+ if self.normalization is not None:
61
+ data = {k: v * self.normalization[k][1] + self.normalization[k][0] if k in self.normalization else v
62
+ for k, v in data.items()}
63
+ for conditions, prediction in self._cart_predictor:
64
+ if all(all(interval.is_in(data[variable]) for interval in intervals)
65
+ for variable, intervals in conditions.items()):
66
+ break
67
+ return prediction, conditions
68
+
69
+ @property
70
+ def n_rules(self) -> int:
71
+ return self._cart_predictor.n_leaves
@@ -1,7 +1,8 @@
1
1
  from __future__ import annotations
2
2
 
3
- import math
4
3
  from abc import ABC
4
+ from collections import Iterable
5
+
5
6
  import numpy as np
6
7
  import pandas as pd
7
8
  from sklearn.base import ClassifierMixin
@@ -13,7 +14,7 @@ from psyke.extraction import PedagogicalExtractor
13
14
  from psyke.extraction.hypercubic.hypercube import HyperCube, RegressionCube, ClassificationCube, ClosedCube, Point, \
14
15
  GenericCube
15
16
  from psyke.hypercubepredictor import HyperCubePredictor
16
- from psyke.schema import Between, Outside, Value
17
+ from psyke.schema import Value
17
18
  from psyke.utils.logic import create_variable_list, create_head, to_var, Simplifier
18
19
  from psyke.utils import Target
19
20
  from psyke.extraction.hypercubic.strategy import Strategy, FixedStrategy
@@ -209,10 +210,16 @@ class FeatureRanker:
209
210
 
210
211
 
211
212
  class Grid:
212
- def __init__(self, iterations: int = 1, strategy: Strategy | list[Strategy] = FixedStrategy()):
213
+ def __init__(self, iterations: int = 1, strategy: Strategy | Iterable[Strategy] = FixedStrategy()):
213
214
  self.iterations = iterations
214
215
  self.strategy = strategy
215
216
 
217
+ def make_fair(self, features: Iterable[str]):
218
+ if isinstance(self.strategy, Strategy):
219
+ self.strategy.make_fair(features)
220
+ elif isinstance(self.strategy, Iterable):
221
+ [strategy.make_fair(features) for strategy in self.strategy]
222
+
216
223
  def get(self, feature: str, depth: int) -> int:
217
224
  if isinstance(self.strategy, list):
218
225
  return self.strategy[depth].get(feature)
@@ -17,7 +17,7 @@ class CReEPy(HyperCubeExtractor):
17
17
  """
18
18
 
19
19
  def __init__(self, predictor, clustering=Clustering.exact, depth: int = 3, error_threshold: float = 0.1,
20
- output: Target = Target.CONSTANT, gauss_components: int = 5, ranks: list[(str, float)] = [],
20
+ output: Target = Target.CONSTANT, gauss_components: int = 5, ranks: Iterable[(str, float)] = tuple(),
21
21
  ignore_threshold: float = 0.0, discretization=None, normalization=None,
22
22
  seed: int = get_default_random_seed()):
23
23
  super().__init__(predictor, Target.CLASSIFICATION if isinstance(predictor, ClassifierMixin) else output,
@@ -117,3 +117,6 @@ class GridEx(HyperCubeExtractor):
117
117
  to_split = [cube for cube in to_split if cube not in best[0]] + [best[1]]
118
118
  not_in_cache = [best[1]]
119
119
  return to_split
120
+
121
+ def make_fair(self, features: Iterable[str]):
122
+ self.grid.make_fair(features)
@@ -23,6 +23,7 @@ class ITER(HyperCubeExtractor):
23
23
  raise NotImplementedError
24
24
  self.predictor = predictor
25
25
  self.min_update = min_update
26
+ self._init_points = n_points
26
27
  self.n_points = n_points
27
28
  self.max_iterations = max_iterations
28
29
  self.min_examples = min_examples
@@ -33,6 +34,10 @@ class ITER(HyperCubeExtractor):
33
34
  self.seed = seed
34
35
  self.ignore_dimensions = ignore_dimensions if ignore_dimensions is not None else []
35
36
 
37
+ def make_fair(self, features: Iterable[str]):
38
+ self.n_points = self._init_points
39
+ self.ignore_dimensions += list(features)
40
+
36
41
  def _best_cube(self, dataframe: pd.DataFrame, cube: GenericCube, cubes: Iterable[Expansion]) -> Expansion | None:
37
42
  expansions = []
38
43
  for limit in cubes:
@@ -1,16 +1,20 @@
1
1
  from __future__ import annotations
2
2
 
3
3
  from functools import reduce
4
- from typing import Iterable
4
+ from collections import Iterable
5
5
 
6
6
 
7
7
  class Strategy:
8
- def __init__(self):
9
- self._partitions = None
8
+ def __init__(self, partitions = None):
9
+ self._partitions = partitions
10
+ self._no_features = []
10
11
 
11
12
  def get(self, feature: str) -> int:
12
13
  raise NotImplementedError
13
14
 
15
+ def make_fair(self, features: Iterable[str]):
16
+ self._no_features = features
17
+
14
18
  def partition_number(self, features: Iterable[str]) -> int:
15
19
  return reduce(lambda x, y: x * y, map(self.get, features), 1)
16
20
 
@@ -29,23 +33,23 @@ class Strategy:
29
33
 
30
34
  class FixedStrategy(Strategy):
31
35
  def __init__(self, partitions: int = 2):
32
- super().__init__()
33
- self._partitions = partitions
36
+ super().__init__(partitions)
34
37
 
35
38
  def get(self, feature: str) -> int:
36
- return self._partitions
39
+ return 1 if feature in self._no_features else self._partitions
37
40
 
38
41
  def __str__(self):
39
42
  return "Fixed ({})".format(super().__str__())
40
43
 
41
44
 
42
45
  class AdaptiveStrategy(Strategy):
43
- def __init__(self, features: Iterable[str], partitions: Iterable[tuple[float, float]] | None = None):
44
- super().__init__()
46
+ def __init__(self, features: Iterable[(str, float)], partitions: Iterable[tuple[float, float]] | None = None):
47
+ super().__init__(partitions if partitions is not None else [(0.33, 2), (0.67, 3)])
45
48
  self.features = features
46
- self._partitions = partitions if partitions is not None else [(0.33, 2), (0.67, 3)]
47
49
 
48
50
  def get(self, feature: str) -> int:
51
+ if feature in self._no_features:
52
+ return 1
49
53
  importance = next(filter(lambda t: t[0] == feature, self.features))[1]
50
54
  n = 1
51
55
  for (imp, part) in self._partitions:
@@ -55,8 +55,10 @@ class PEDRO(SKEOptimizer, IterativeOptimizer):
55
55
  patience = self.patience
56
56
  while patience > 0:
57
57
  print("{}. {}. Threshold = {:.2f}. ".format(self.algorithm_name, grid, threshold), end="")
58
- extractor = self.algorithm(self.predictor, grid, min_examples=25, output=self.output,
59
- threshold=threshold, normalization=self.normalization)
58
+ param_dict = dict(min_examples=25, threshold=threshold, normalization=self.normalization)
59
+ if self.algorithm != Extractor.gridrex:
60
+ param_dict['output'] = self.output
61
+ extractor = self.algorithm(self.predictor, grid, **param_dict)
60
62
  _ = extractor.extract(self.dataframe)
61
63
  error_function = (lambda *x: 1 - extractor.accuracy(*x)) if self.output == Target.CLASSIFICATION \
62
64
  else extractor.mae
@@ -126,7 +126,7 @@ def to_var(name: str) -> Var:
126
126
  def create_variable_list(features: list[DiscreteFeature], dataset: pd.DataFrame = None) -> dict[str, Var]:
127
127
  dataset = dataset.columns[:-1] if dataset is not None else None
128
128
  values = {feature.name: to_var(feature.name) for feature in features} \
129
- if len(features) > 0 else {name: to_var(name) for name in dataset}
129
+ if features else {name: to_var(name) for name in dataset}
130
130
  return values
131
131
 
132
132
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: psyke
3
- Version: 0.8.14.dev6
3
+ Version: 0.9.0
4
4
  Summary: Python-based implementation of PSyKE, i.e. a Platform for Symbolic Knowledge Extraction
5
5
  Home-page: https://github.com/psykei/psyke-python
6
6
  Author: Matteo Magnini
@@ -17,8 +17,10 @@ psyke/clustering/utils.py
17
17
  psyke/clustering/cream/__init__.py
18
18
  psyke/clustering/exact/__init__.py
19
19
  psyke/extraction/__init__.py
20
+ psyke/extraction/cart/CartPredictor.py
21
+ psyke/extraction/cart/FairTree.py
22
+ psyke/extraction/cart/FairTreePredictor.py
20
23
  psyke/extraction/cart/__init__.py
21
- psyke/extraction/cart/predictor.py
22
24
  psyke/extraction/hypercubic/__init__.py
23
25
  psyke/extraction/hypercubic/hypercube.py
24
26
  psyke/extraction/hypercubic/strategy.py
psyke-0.8.14.dev6/VERSION DELETED
@@ -1 +0,0 @@
1
- 0.8.14.dev6
@@ -1,21 +0,0 @@
1
- from abc import ABC
2
-
3
- import pandas as pd
4
- from tuprolog.theory import Theory
5
-
6
- from psyke import Extractor
7
-
8
-
9
- class PedagogicalExtractor(Extractor, ABC):
10
-
11
- def __init__(self, predictor, discretization=None, normalization=None):
12
- Extractor.__init__(self, predictor=predictor, discretization=discretization, normalization=normalization)
13
-
14
- def extract(self, dataframe: pd.DataFrame) -> Theory:
15
- new_y = pd.DataFrame(self.predictor.predict(dataframe.iloc[:, :-1])).set_index(dataframe.index)
16
- data = dataframe.iloc[:, :-1].copy().join(new_y)
17
- data.columns = dataframe.columns
18
- return self._extract(data)
19
-
20
- def _extract(self, dataframe: pd.DataFrame) -> Theory:
21
- raise NotImplementedError('extract')
@@ -1,96 +0,0 @@
1
- from abc import ABC
2
-
3
- from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor
4
-
5
- from psyke.extraction import PedagogicalExtractor
6
- from psyke.extraction.cart.predictor import CartPredictor, LeafConstraints, LeafSequence
7
- from psyke import get_default_random_seed
8
- from psyke.schema import GreaterThan, DiscreteFeature
9
- from psyke.utils.logic import create_variable_list, create_head, create_term
10
- from tuprolog.core import clause, Var, Struct
11
- from tuprolog.theory import Theory, mutable_theory
12
- from typing import Iterable
13
- import pandas as pd
14
-
15
-
16
- TREE_SEED = get_default_random_seed()
17
-
18
-
19
- class Cart(PedagogicalExtractor, ABC):
20
-
21
- def __init__(self, predictor, max_depth: int = 3, max_leaves: int = None, max_features=None,
22
- discretization: Iterable[DiscreteFeature] = None,
23
- normalization=None, simplify: bool = True):
24
- super().__init__(predictor, discretization, normalization)
25
- self._cart_predictor = CartPredictor(normalization=normalization)
26
- self.depth = max_depth
27
- self.leaves = max_leaves
28
- self.max_features = max_features
29
- self._simplify = simplify
30
-
31
- def _create_body(self, variables: dict[str, Var], conditions: LeafConstraints) -> Iterable[Struct]:
32
- results = []
33
- for feature_name, cond_list in conditions.items():
34
- for condition in cond_list:
35
- features = [d for d in self.discretization if feature_name in d.admissible_values]
36
- feature: DiscreteFeature = features[0] if len(features) > 0 else None
37
- results.append(create_term(variables[feature_name], condition) if feature is None else
38
- create_term(variables[feature.name],
39
- feature.admissible_values[feature_name],
40
- isinstance(condition, GreaterThan)))
41
- return results
42
-
43
- @staticmethod
44
- def _simplify_nodes(nodes: list) -> Iterable:
45
- simplified = [nodes.pop(0)]
46
- while len(nodes) > 0:
47
- first_node = nodes[0][0]
48
- for k, conditions in first_node.items():
49
- for condition in conditions:
50
- if all(k in node[0] and condition in node[0][k] for node in nodes):
51
- [node[0][k].remove(condition) for node in nodes]
52
- simplified.append(nodes.pop(0))
53
- return [({k: v for k, v in rule.items() if v != []}, prediction) for rule, prediction in simplified]
54
-
55
- def _create_theory(self, data: pd.DataFrame) -> Theory:
56
- new_theory = mutable_theory()
57
- nodes = [node for node in self._cart_predictor]
58
- nodes = Cart._simplify_nodes(nodes) if self._simplify else nodes
59
- for (constraints, prediction) in nodes:
60
- if self.normalization is not None and data.columns[-1] in self.normalization:
61
- m, s = self.normalization[data.columns[-1]]
62
- prediction = prediction * s + m
63
- variables = create_variable_list(self.discretization, data)
64
- new_theory.assertZ(
65
- clause(
66
- create_head(data.columns[-1], list(variables.values()), prediction),
67
- self._create_body(variables, constraints)
68
- )
69
- )
70
- return new_theory
71
-
72
- def _extract(self, data: pd.DataFrame) -> Theory:
73
- tree = DecisionTreeClassifier if isinstance(data.iloc[0, -1], str) else DecisionTreeRegressor
74
- self._cart_predictor.predictor = tree(random_state=TREE_SEED, max_depth=self.depth,
75
- max_leaf_nodes=self.leaves, max_features=self.max_features)
76
- self._cart_predictor.predictor.fit(data.iloc[:, :-1], data.iloc[:, -1])
77
- return self._create_theory(data)
78
-
79
- def _predict(self, dataframe: pd.DataFrame) -> Iterable:
80
- return self._cart_predictor.predict(dataframe)
81
-
82
- def predict_why(self, data: dict[str, float], verbose=True):
83
- prediction = None
84
- conditions = {}
85
- if self.normalization is not None:
86
- data = {k: v * self.normalization[k][1] + self.normalization[k][0] if k in self.normalization else v
87
- for k, v in data.items()}
88
- for conditions, prediction in self._cart_predictor:
89
- if all(all(interval.is_in(data[variable]) for interval in intervals)
90
- for variable, intervals in conditions.items()):
91
- break
92
- return prediction, conditions
93
-
94
- @property
95
- def n_rules(self) -> int:
96
- return self._cart_predictor.n_leaves
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes