humancompatible-detect 0.1.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (33) hide show
  1. humancompatible/__init__.py +0 -0
  2. humancompatible/detect/__init__.py +13 -0
  3. humancompatible/detect/binarizer/Binarizer.py +265 -0
  4. humancompatible/detect/binarizer/__init__.py +3 -0
  5. humancompatible/detect/data_handler/DataHandler.py +351 -0
  6. humancompatible/detect/data_handler/__init__.py +3 -0
  7. humancompatible/detect/data_handler/features/Binary.py +107 -0
  8. humancompatible/detect/data_handler/features/Categorical.py +157 -0
  9. humancompatible/detect/data_handler/features/Contiguous.py +107 -0
  10. humancompatible/detect/data_handler/features/Feature.py +105 -0
  11. humancompatible/detect/data_handler/features/Mixed.py +147 -0
  12. humancompatible/detect/data_handler/features/__init__.py +6 -0
  13. humancompatible/detect/data_handler/features/utils.py +88 -0
  14. humancompatible/detect/data_handler/types.py +15 -0
  15. humancompatible/detect/detect_bias.py +280 -0
  16. humancompatible/detect/evaluate_bias.py +286 -0
  17. humancompatible/detect/helpers/__init__.py +5 -0
  18. humancompatible/detect/helpers/prepare.py +105 -0
  19. humancompatible/detect/helpers/utils.py +364 -0
  20. humancompatible/detect/methods/__init__.py +0 -0
  21. humancompatible/detect/methods/l_inf/__init__.py +3 -0
  22. humancompatible/detect/methods/l_inf/l_inf.py +109 -0
  23. humancompatible/detect/methods/l_inf/lp_tools.py +53 -0
  24. humancompatible/detect/methods/msd/__init__.py +4 -0
  25. humancompatible/detect/methods/msd/mapping_msd.py +112 -0
  26. humancompatible/detect/methods/msd/metrics_msd.py +34 -0
  27. humancompatible/detect/methods/msd/msd.py +93 -0
  28. humancompatible/detect/methods/msd/one_rule.py +324 -0
  29. humancompatible_detect-0.1.4.dist-info/METADATA +253 -0
  30. humancompatible_detect-0.1.4.dist-info/RECORD +33 -0
  31. humancompatible_detect-0.1.4.dist-info/WHEEL +5 -0
  32. humancompatible_detect-0.1.4.dist-info/licenses/LICENSE +201 -0
  33. humancompatible_detect-0.1.4.dist-info/top_level.txt +1 -0
File without changes
@@ -0,0 +1,13 @@
1
+ from .detect_bias import most_biased_subgroup, most_biased_subgroup_csv, most_biased_subgroup_two_samples
2
+ from .evaluate_bias import evaluate_biased_subgroup, evaluate_biased_subgroup_csv, evaluate_biased_subgroup_two_samples
3
+ from .helpers.utils import detect_and_score
4
+
5
+ __all__ = [
6
+ "detect_and_score",
7
+ "most_biased_subgroup",
8
+ "most_biased_subgroup_csv",
9
+ "most_biased_subgroup_two_samples",
10
+ "evaluate_biased_subgroup",
11
+ "evaluate_biased_subgroup_csv",
12
+ "evaluate_biased_subgroup_two_samples",
13
+ ]
@@ -0,0 +1,265 @@
1
+ from __future__ import annotations
2
+
3
+ from enum import Enum
4
+
5
+ import numpy as np
6
+ import pandas as pd
7
+
8
+ from ..data_handler import DataHandler
9
+ from ..data_handler.features import Binary, Categorical, Contiguous, Feature, Mixed
10
+ from ..data_handler.types import CategValue, DataLike, OneDimData
11
+
12
+ BinValue = float | tuple[float, float] | CategValue | list[CategValue] | bool
13
+
14
+
15
+ class Operation(Enum):
16
+ EQ = "="
17
+ NE = "!="
18
+ LE = "<="
19
+ LT = "<"
20
+ GE = ">="
21
+ GT = ">"
22
+ IN = "in"
23
+ NOT_IN = "not in"
24
+ BETWEEN = "between"
25
+ OUTSIDE = "outside"
26
+
27
+ # TODO move the perform to the Bin class
28
+ @classmethod
29
+ def perform(
30
+ cls, op: Operation, vals: np.ndarray[int | float | str], reference: BinValue
31
+ ) -> np.ndarray[bool]:
32
+ if op == Operation.EQ:
33
+ return vals == reference
34
+ elif op == Operation.NE:
35
+ return vals != reference
36
+ elif op == Operation.LE:
37
+ return vals <= reference
38
+ elif op == Operation.LT:
39
+ return vals < reference
40
+ elif op == Operation.GE:
41
+ return vals >= reference
42
+ elif op == Operation.GT:
43
+ return vals > reference
44
+ elif op == Operation.IN:
45
+ result = np.zeros_like(vals, dtype=bool)
46
+ for r in reference:
47
+ result |= vals == r
48
+ return result
49
+ elif op == Operation.NOT_IN:
50
+ return ~Operation.perform(Operation.IN, vals, reference)
51
+ elif op == Operation.BETWEEN:
52
+ return (vals >= reference[0]) & (vals < reference[1])
53
+ elif op == Operation.OUTSIDE:
54
+ return ~Operation.perform(Operation.BETWEEN, vals, reference)
55
+ else:
56
+ raise NotImplementedError(f"Operation {op} is not implemented")
57
+
58
+ @classmethod
59
+ def negated(cls, op) -> Operation:
60
+ return {
61
+ Operation.EQ: Operation.NE,
62
+ Operation.NE: Operation.EQ,
63
+ Operation.LE: Operation.GT,
64
+ Operation.LT: Operation.GE,
65
+ Operation.GE: Operation.LT,
66
+ Operation.GT: Operation.LE,
67
+ Operation.IN: Operation.NOT_IN,
68
+ Operation.NOT_IN: Operation.IN,
69
+ Operation.BETWEEN: Operation.OUTSIDE,
70
+ Operation.OUTSIDE: Operation.BETWEEN,
71
+ }[op]
72
+
73
+
74
+ class Bin:
75
+ def __init__(self, feature: Feature, operation: Operation, value: BinValue):
76
+ self.feature = feature
77
+ self.operation = operation
78
+ self.value = value
79
+
80
+ def negate_self(self):
81
+ if isinstance(self.feature, Binary):
82
+ vals = list(self.feature.value_mapping.keys())
83
+ negated_value = vals[0] if vals[1] == self.value else vals[1]
84
+ return Bin(self.feature, self.operation, negated_value)
85
+ else:
86
+ return Bin(self.feature, Operation.negated(self.operation), self.value)
87
+
88
+ def evaluate(self, values: np.ndarray[int | float | str]) -> np.ndarray[bool]:
89
+ return Operation.perform(self.operation, values, self.value)
90
+
91
+ def __repr__(self):
92
+ return f"Bin({repr(self.feature)}, {repr(self.operation)}, {repr(self.value)})"
93
+
94
+ def __str__(self):
95
+ return f"{str(self.feature)} {self.operation.value} {str(self.value)}"
96
+
97
+ def __eq__(self, other):
98
+ return (
99
+ self.feature == other.feature
100
+ and self.operation == other.operation
101
+ and self.value == other.value
102
+ )
103
+
104
+
105
+ class Binarizer:
106
+ """Handles binarizing the dataset"""
107
+
108
+ # TODO add specific options for binarization of categoricals (only positive and custom sets) and continuous (custom bins - i.e. quantiles)
109
+ def __init__(
110
+ self,
111
+ data_handler: DataHandler,
112
+ target_positive_vals: list[CategValue] | None = None,
113
+ ):
114
+ self.__original_dhandler = data_handler
115
+
116
+ binarized_features: list[list[Bin]] = []
117
+ binarized_negations: list[list[Bin]] = []
118
+ for feature in data_handler.features:
119
+ if isinstance(feature, Contiguous):
120
+ binarizations = []
121
+ negations = []
122
+ minval, maxval = feature.bounds
123
+ # to make the last bin include the max value
124
+ eps = (maxval - minval) / 10000
125
+ n_bins = 10
126
+ prev = minval
127
+ for curr in np.linspace(minval, maxval + eps, n_bins + 1)[1:]:
128
+ bounds = (prev, curr)
129
+ binarizations.append(Bin(feature, Operation.BETWEEN, bounds))
130
+ negations.append(Bin(feature, Operation.OUTSIDE, bounds))
131
+ prev = curr
132
+ binarized_features.append(binarizations)
133
+ binarized_negations.append(negations)
134
+ elif isinstance(feature, Mixed):
135
+ raise NotImplementedError("Mixed features are not yet implemented")
136
+ elif isinstance(feature, Binary):
137
+ inv_map = {i: v for v, i in feature.value_mapping.items()}
138
+ binarized_features.append([Bin(feature, Operation.EQ, inv_map[1])])
139
+ binarized_negations.append([Bin(feature, Operation.EQ, inv_map[0])])
140
+ elif isinstance(feature, Categorical):
141
+ binarizations = []
142
+ negations = []
143
+ for value in feature.orig_vals:
144
+ binarizations.append(Bin(feature, Operation.EQ, value))
145
+ negations.append(Bin(feature, Operation.NE, value))
146
+ binarized_features.append(binarizations)
147
+ binarized_negations.append(negations)
148
+ else:
149
+ raise ValueError("Unsupported feature type")
150
+
151
+ # TARGET
152
+ target = data_handler.target_feature
153
+ if isinstance(target, Binary):
154
+ inv_map = {i: v for v, i in target.value_mapping.items()}
155
+ self.binarized_target = Bin(target, Operation.EQ, inv_map[1])
156
+ self.binarized_target_neg = Bin(target, Operation.EQ, inv_map[0])
157
+ elif isinstance(target, Categorical) and (target_positive_vals is not None):
158
+ self.binarized_target = Bin(target, Operation.IN, target_positive_vals)
159
+ negative_vals = [
160
+ v for v in target.orig_vals if v not in target_positive_vals
161
+ ]
162
+ self.binarized_target_neg = Bin(target, Operation.IN, negative_vals)
163
+ else:
164
+ raise NotImplementedError(
165
+ "Target feature must be Binary or Categorical with single binarization"
166
+ )
167
+
168
+ self.__binarized_features = binarized_features
169
+ self.__binarized_negations = binarized_negations
170
+
171
+ def encode(
172
+ self, X: DataLike, include_negations=False, include_binary_negations=False
173
+ ) -> np.ndarray[bool]:
174
+ if isinstance(X, pd.DataFrame):
175
+ X = X.values
176
+
177
+ values = []
178
+ for i, binariaztions in enumerate(self.__binarized_features):
179
+ for bin in binariaztions:
180
+ values.append(Operation.perform(bin.operation, X[:, [i]], bin.value))
181
+ if include_negations:
182
+ for i, binariaztions in enumerate(self.__binarized_negations):
183
+ for bin in binariaztions:
184
+ values.append(
185
+ Operation.perform(bin.operation, X[:, [i]], bin.value)
186
+ )
187
+ elif include_binary_negations:
188
+ for i, binariaztions in enumerate(self.__binarized_negations):
189
+ for bin in binariaztions:
190
+ if isinstance(bin.feature, Binary):
191
+ values.append(
192
+ Operation.perform(bin.operation, X[:, [i]], bin.value)
193
+ )
194
+ return np.hstack(values)
195
+
196
+ def encode_y(self, y: OneDimData) -> np.ndarray[bool]:
197
+ if isinstance(y, pd.Series):
198
+ y = y.values
199
+ res = Operation.perform(
200
+ self.binarized_target.operation, y, self.binarized_target.value
201
+ )
202
+ return res.flatten()
203
+
204
+ def __feature_name_tuples(self, include_negations, include_binary_negations):
205
+ names = []
206
+ if include_negations:
207
+ feats = self.__binarized_features + self.__binarized_negations
208
+ else:
209
+ feats = [f for f in self.__binarized_features]
210
+ if include_binary_negations:
211
+ for binarization in self.__binarized_negations:
212
+ if isinstance(binarization[0].feature, Binary):
213
+ feats.append(binarization)
214
+ for binarization in feats:
215
+ for bin in binarization:
216
+ names.append((bin.feature.name, bin.operation.value, str(bin.value)))
217
+ return names
218
+
219
+ def feature_names(
220
+ self, include_negations=False, include_binary_negations=False
221
+ ) -> list[str]:
222
+ return [
223
+ f"{feat} {op} {val}"
224
+ for (feat, op, val) in self.__feature_name_tuples(
225
+ include_negations, include_binary_negations
226
+ )
227
+ ]
228
+
229
+ def target_name(self) -> tuple[str, str]:
230
+ bin = self.binarized_target
231
+ positive = f"{bin.feature} {bin.operation.value} {bin.value}"
232
+ bin = self.binarized_target_neg
233
+ negative = f"{bin.feature} {bin.operation.value} {bin.value}"
234
+ return positive, negative
235
+
236
+ def multi_index_feats(
237
+ self, include_negations=False, include_binary_negations=False
238
+ ) -> pd.MultiIndex:
239
+ return pd.MultiIndex.from_tuples(
240
+ self.__feature_name_tuples(include_negations, include_binary_negations),
241
+ names=["feature", "operation", "value"],
242
+ )
243
+
244
+ def get_bin_encodings(
245
+ self, include_negations=False, include_binary_negations=False, return_flat=True
246
+ ):
247
+ if include_negations:
248
+ feats = self.__binarized_features + self.__binarized_negations
249
+ else:
250
+ feats = [f for f in self.__binarized_features]
251
+ if include_binary_negations:
252
+ for binarization in self.__binarized_negations:
253
+ if isinstance(binarization[0].feature, Binary):
254
+ feats.append(binarization)
255
+ if not return_flat:
256
+ return feats
257
+ flat = []
258
+ for binariaztions in feats:
259
+ for bin in binariaztions:
260
+ flat.append(bin)
261
+ return flat
262
+
263
+ @property
264
+ def data_handler(self):
265
+ return self.__original_dhandler
@@ -0,0 +1,3 @@
1
+ from .Binarizer import Bin, Binarizer, Operation
2
+
3
+ __all__ = ["Bin", "Binarizer", "Operation"]
@@ -0,0 +1,351 @@
1
+ from __future__ import annotations
2
+
3
+ import numpy as np
4
+ import pandas as pd
5
+
6
+ from .features import (
7
+ Binary,
8
+ Categorical,
9
+ Contiguous,
10
+ Feature,
11
+ Monotonicity,
12
+ make_feature,
13
+ )
14
+ from .types import CategValue, DataLike, FeatureID, OneDimData
15
+
16
+
17
+ class DataHandler:
18
+ """
19
+ Performs all data processing from a pandas DataFrame/numpy array to a normalized and encoded input
20
+ Expected use is to initialize this with training data and then use it to encode all data.
21
+ Supports mixed encoding, where only some values are categorical
22
+ Normalizes contiguous data to [0, 1] range
23
+ Produces either one-hot encoded data or direct data with mapped categorical data to negative integers
24
+ """
25
+
26
+ def __init__(
27
+ self,
28
+ features: list[Feature],
29
+ target: Feature | None = None,
30
+ causal_inc: list[tuple[Feature, Feature]] | None = None,
31
+ greater_than: list[tuple[Feature, Feature]] | None = None,
32
+ ):
33
+ self.__input_features = features
34
+ self.__target_feature = target
35
+ self.__causal_inc = causal_inc if causal_inc is not None else []
36
+ self.__greater_than = greater_than if greater_than is not None else []
37
+
38
+ @classmethod
39
+ def from_data(
40
+ cls,
41
+ X: DataLike,
42
+ y: OneDimData | None = None,
43
+ categ_map: dict[FeatureID, list[CategValue]] = {},
44
+ ordered: list[FeatureID] = [],
45
+ bounds_map: dict[FeatureID, tuple[int, int]] = {},
46
+ discrete: list[FeatureID] = [],
47
+ immutable: list[FeatureID] = [],
48
+ monotonicity: dict[FeatureID, Monotonicity] = {},
49
+ # TODO more general causality
50
+ causal_inc: list[tuple[FeatureID, FeatureID]] = [],
51
+ greater_than: list[tuple[FeatureID, FeatureID]] = [],
52
+ regression: bool = False,
53
+ feature_names: list[str] | None = None,
54
+ target_name: str | None = None,
55
+ ) -> DataHandler:
56
+ """
57
+ Construct a DataHandler instance.
58
+
59
+ Parameters:
60
+ -----------
61
+ X : array-like (2 dimensional)
62
+ Input features. Shape: (num_samples, num_features)
63
+ y : array-like (1 dimensional)
64
+ Target feature (e.g., labels or regression targets). Shape: (num_samples,)
65
+ categ : dictionary
66
+ Dictionary with indices (or column names for DataFrame) of categorical features as keys
67
+ and a list of unique categorical values as values.
68
+
69
+ If the list is empty, each unique value of the feature is considered categorical
70
+ If the list is non-empty, but does not cover all values, the feature is considered mixed
71
+ regression : bool
72
+ True if the task is regression, False if y is categorical and task is classification.
73
+ feature_names : optional list of strings
74
+ List of feature names, if None it is recovered from column names if X is a DataFrame
75
+ target_name : optional string
76
+ Name of the target feature, if None it is recovered from X if X is a pandas Series
77
+ """
78
+ if isinstance(X, pd.DataFrame):
79
+ if feature_names is None:
80
+ feature_names = X.columns
81
+ if target_name is not None and y is None:
82
+ print("Taking target values from the X matrix")
83
+ y = X[target_name]
84
+ X = X.drop(columns=target_name)
85
+ X = X.to_numpy()
86
+
87
+ if y is not None:
88
+ if target_name is None:
89
+ if isinstance(y, pd.Series):
90
+ target_name = y.name
91
+ else:
92
+ target_name = "target"
93
+
94
+ if regression:
95
+ target_feature = Contiguous(y, target_name)
96
+ else:
97
+ if len(np.unique(y)) > 2:
98
+ target_feature = Categorical(y, name=target_name)
99
+ else:
100
+ target_feature = Binary(y, name=target_name)
101
+ # TODO make the target values specifiable
102
+ else:
103
+ target_feature = None
104
+
105
+ n_features = X.shape[1]
106
+ if feature_names is None:
107
+ feature_names = [None] * n_features
108
+ if len(feature_names) != n_features:
109
+ raise ValueError("Incorrect length of list of feature names.")
110
+
111
+ input_features: list[Feature] = []
112
+ # stores lists of categorical values of applicable features, used for mapping to integer values
113
+ for feat_i, feat_name in enumerate(feature_names):
114
+ input_features.append(
115
+ make_feature(
116
+ X[:, feat_i],
117
+ feat_name,
118
+ categ_map.get(feat_name, None),
119
+ bounds_map.get(feat_name, None),
120
+ feat_name in ordered,
121
+ feat_name in discrete,
122
+ monotone=monotonicity.get(feat_name, Monotonicity.NONE),
123
+ modifiable=feat_name not in immutable,
124
+ )
125
+ )
126
+
127
+ causal_inc = [
128
+ (
129
+ input_features[feature_names.index(i)],
130
+ input_features[feature_names.index(j)],
131
+ )
132
+ for i, j in causal_inc
133
+ ]
134
+ greater_than = [
135
+ (
136
+ input_features[feature_names.index(i)],
137
+ input_features[feature_names.index(j)],
138
+ )
139
+ for i, j in greater_than
140
+ ]
141
+ return DataHandler(input_features, target_feature, causal_inc, greater_than)
142
+
143
+ @property
144
+ def causal_inc(self) -> list[tuple[Feature, Feature]]:
145
+ return self.__causal_inc
146
+
147
+ @property
148
+ def greater_than(self) -> list[tuple[Feature, Feature]]:
149
+ return self.__greater_than
150
+
151
+ @property
152
+ def n_features(self) -> int:
153
+ """Number of features in the input space"""
154
+ return len(self.__input_features)
155
+
156
+ @property
157
+ def features(self) -> list[Feature]:
158
+ """List of input features"""
159
+ return self.__input_features
160
+
161
+ @property
162
+ def target_feature(self) -> Feature:
163
+ """Target feature"""
164
+ return self.__target_feature
165
+
166
+ @property
167
+ def feature_names(self) -> list[str]:
168
+ """List of feature names"""
169
+ return [f.name for f in self.__input_features]
170
+
171
+ def encode(
172
+ self, X: DataLike, normalize: bool = True, one_hot: bool = True
173
+ ) -> np.ndarray[np.float64]:
174
+ """
175
+ Encode input features.
176
+
177
+ Parameters:
178
+ -----------
179
+ X : array-like
180
+ Input features (data matrix or DataFrame). Shape: (num_samples, num_features)
181
+ normalize : bool, optional
182
+ Whether to normalize the features (default is True).
183
+ one_hot : bool, optional
184
+ Whether to perform one-hot encoding for categorical values (default is True).
185
+
186
+ Returns:
187
+ --------
188
+ encoded_X : numpy array
189
+ Encoded input features. Shape: (num_samples, one_hot_features) when one hot encoding is performed, (num_samples, num_features) otherwise
190
+ """
191
+ if isinstance(X, pd.DataFrame):
192
+ X = X.to_numpy()
193
+ if isinstance(X, pd.Series):
194
+ X = X.to_numpy()
195
+
196
+ if len(X.shape) == 1:
197
+ Xmat = X.reshape(1, -1)
198
+ return self.encode(Xmat, normalize=normalize, one_hot=one_hot)[0]
199
+
200
+ enc = []
201
+ for feat_i, feature in enumerate(self.__input_features):
202
+ enc.append(
203
+ feature.encode(X[:, feat_i], normalize, one_hot).reshape(X.shape[0], -1)
204
+ )
205
+
206
+ return np.concatenate(enc, axis=1).astype(np.float64)
207
+
208
+ def encode_y(
209
+ self, y: OneDimData, normalize: bool = True, one_hot: bool = True
210
+ ) -> np.ndarray[np.float64]:
211
+ """
212
+ Encode target feature.
213
+
214
+ Parameters:
215
+ -----------
216
+ y : array-like
217
+ Target feature (data matrix or DataFrame of labels or regression targets). Shape: (num_samples,)
218
+ normalize : bool, optional
219
+ Whether to normalize the features (default is True).
220
+ one_hot : bool, optional
221
+ Whether to perform one-hot encoding for categorical values (default is True).
222
+
223
+ Returns:
224
+ --------
225
+ encoded_y : numpy array
226
+ Encoded target feature. Shape: (num_samples, num_values) for one hot encoding or (num_samples,) otherwise
227
+ """
228
+ return self.__target_feature.encode(y, normalize, one_hot)
229
+
230
+ def encode_all(self, X_all: np.ndarray, normalize: bool, one_hot: bool):
231
+ return np.concatenate(
232
+ [
233
+ self.encode(X_all[:, :-1], normalize, one_hot),
234
+ self.encode_y(X_all[:, -1], normalize, one_hot).reshape(-1, 1),
235
+ ],
236
+ axis=1,
237
+ )
238
+
239
+ def decode(
240
+ self,
241
+ X: np.ndarray[np.float64],
242
+ denormalize: bool = True,
243
+ encoded_one_hot: bool = True,
244
+ as_dataframe: bool = True,
245
+ ) -> np.ndarray[np.float64]:
246
+ """
247
+ Decode input features.
248
+
249
+ Parameters:
250
+ -----------
251
+ X : array-like
252
+ Input data matrix. Shape: (num_samples, num_enc_features)
253
+ where num_enc_features can be higher than num_features, because of one-hot encoding
254
+ denormalize : bool, optional
255
+ Whether to invert the normalization of the features (default is True).
256
+ encoded_one_hot : bool, optional
257
+ Whether the input matrix is one-hot encoded (default is True).
258
+ as_dataframe : bool, optional
259
+ Whether to return a pandas DataFrame or numpy array (default is True - DataFrame).
260
+
261
+ Returns:
262
+ --------
263
+ decoded_X : numpy array
264
+ Decoded features in the original format. Shape: (num_samples, num_features)
265
+ """
266
+ if X.shape[0] == 0:
267
+ if as_dataframe:
268
+ return pd.DataFrame([], columns=[f.name for f in self.__input_features])
269
+ return np.empty((0, self.n_features))
270
+ dec = []
271
+ curr_col = 0
272
+ for feature in self.__input_features:
273
+ w = feature.encoding_width(encoded_one_hot)
274
+ dec.append(
275
+ feature.decode(X[:, curr_col : curr_col + w], denormalize, as_dataframe)
276
+ )
277
+ curr_col += w
278
+ if as_dataframe:
279
+ return pd.concat(dec, axis=1)
280
+ return np.concatenate([x.reshape(X.shape[0], -1) for x in dec], axis=1)
281
+
282
+ def decode_y(
283
+ self,
284
+ y: np.ndarray[np.float64],
285
+ denormalize: bool = True,
286
+ as_series: bool = True,
287
+ ) -> np.ndarray[np.float64]:
288
+ """
289
+ Decode target feature.
290
+
291
+ Parameters:
292
+ -----------
293
+ y : array-like
294
+ Target feature data. Shape: (num_samples,) for general case
295
+ or (num_samples, num_categorical_values) in case of one-hot encoding
296
+ denormalize : bool, optional
297
+ Whether to invert the normalization of the feature (default is True).
298
+ as_series : bool, optional
299
+ Whether to return a pandas Series or numpy array (default is True - Series).
300
+
301
+ Returns:
302
+ --------
303
+ decoded_y : numpy array
304
+ Decoded target feature data. Shape: (num_samples,)
305
+ """
306
+ return self.__target_feature.decode(y, denormalize, as_series)
307
+
308
+ def encoding_width(self, one_hot: bool) -> int:
309
+ return sum([f.encoding_width(one_hot) for f in self.__input_features])
310
+
311
+ def allowed_changes(self, pre_vals, post_vals):
312
+ for f, pre, pos in zip(self.features, pre_vals, post_vals):
313
+ if not f.allowed_change(pre, pos):
314
+ return False
315
+
316
+ for cause, effect in self.__causal_inc:
317
+ cause_i = self.features.index(cause)
318
+ pre_cause = cause.encode(pre_vals[cause_i], normalize=False, one_hot=False)
319
+ pos_cause = cause.encode(post_vals[cause_i], normalize=False, one_hot=False)
320
+ if isinstance(cause, Categorical):
321
+ applied = pos_cause in cause.greater_than(pre_cause)
322
+ elif isinstance(cause, Contiguous):
323
+ applied = pos_cause > pre_cause
324
+ else:
325
+ raise ValueError("invalid feature type")
326
+ if applied:
327
+ effect_i = self.features.index(effect)
328
+ pre_effect = effect.encode(
329
+ pre_vals[effect_i], normalize=False, one_hot=False
330
+ )
331
+ pos_effect = effect.encode(
332
+ post_vals[effect_i], normalize=False, one_hot=False
333
+ )
334
+ if isinstance(effect, Categorical):
335
+ if pos_effect not in effect.greater_than(pre_effect):
336
+ return False
337
+ elif isinstance(effect, Contiguous):
338
+ if pos_effect <= pre_effect:
339
+ return False
340
+ else:
341
+ raise ValueError("invalid feature type")
342
+
343
+ for greater, smaller in self.__greater_than:
344
+ if (
345
+ post_vals[self.features.index(smaller)]
346
+ > post_vals[self.features.index(greater)]
347
+ ):
348
+ return False
349
+ return True
350
+
351
+ # TODO dalsi nadstavba - datawrapper - ktera si bude pamatovat jestli se slo one-hot, normalizovalo atd
@@ -0,0 +1,3 @@
1
+ from .DataHandler import DataHandler
2
+
3
+ __all__ = ["DataHandler"]