aplr 10.8.0__cp38-cp38-macosx_11_0_x86_64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of aplr might be problematic. Click here for more details.
- aplr/__init__.py +1 -0
- aplr/aplr.py +600 -0
- aplr-10.8.0.dist-info/LICENSE +21 -0
- aplr-10.8.0.dist-info/METADATA +47 -0
- aplr-10.8.0.dist-info/RECORD +8 -0
- aplr-10.8.0.dist-info/WHEEL +5 -0
- aplr-10.8.0.dist-info/top_level.txt +2 -0
- aplr_cpp.cpython-38-darwin.so +0 -0
aplr/__init__.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
from .aplr import *
|
aplr/aplr.py
ADDED
|
@@ -0,0 +1,600 @@
|
|
|
1
|
+
from typing import List, Callable, Optional, Dict, Union
|
|
2
|
+
import numpy as np
|
|
3
|
+
import aplr_cpp
|
|
4
|
+
import itertools
|
|
5
|
+
|
|
6
|
+
FloatVector = np.ndarray
|
|
7
|
+
FloatMatrix = np.ndarray
|
|
8
|
+
IntVector = np.ndarray
|
|
9
|
+
IntMatrix = np.ndarray
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class APLRRegressor:
|
|
13
|
+
def __init__(
|
|
14
|
+
self,
|
|
15
|
+
m: int = 3000,
|
|
16
|
+
v: float = 0.5,
|
|
17
|
+
random_state: int = 0,
|
|
18
|
+
loss_function: str = "mse",
|
|
19
|
+
link_function: str = "identity",
|
|
20
|
+
n_jobs: int = 0,
|
|
21
|
+
cv_folds: int = 5,
|
|
22
|
+
bins: int = 300,
|
|
23
|
+
max_interaction_level: int = 1,
|
|
24
|
+
max_interactions: int = 100000,
|
|
25
|
+
min_observations_in_split: int = 4,
|
|
26
|
+
ineligible_boosting_steps_added: int = 15,
|
|
27
|
+
max_eligible_terms: int = 7,
|
|
28
|
+
verbosity: int = 0,
|
|
29
|
+
dispersion_parameter: float = 1.5,
|
|
30
|
+
validation_tuning_metric: str = "default",
|
|
31
|
+
quantile: float = 0.5,
|
|
32
|
+
calculate_custom_validation_error_function: Optional[
|
|
33
|
+
Callable[
|
|
34
|
+
[
|
|
35
|
+
FloatVector,
|
|
36
|
+
FloatVector,
|
|
37
|
+
FloatVector,
|
|
38
|
+
FloatVector,
|
|
39
|
+
FloatMatrix,
|
|
40
|
+
],
|
|
41
|
+
float,
|
|
42
|
+
]
|
|
43
|
+
] = None,
|
|
44
|
+
calculate_custom_loss_function: Optional[
|
|
45
|
+
Callable[
|
|
46
|
+
[
|
|
47
|
+
FloatVector,
|
|
48
|
+
FloatVector,
|
|
49
|
+
FloatVector,
|
|
50
|
+
FloatVector,
|
|
51
|
+
FloatMatrix,
|
|
52
|
+
],
|
|
53
|
+
float,
|
|
54
|
+
]
|
|
55
|
+
] = None,
|
|
56
|
+
calculate_custom_negative_gradient_function: Optional[
|
|
57
|
+
Callable[
|
|
58
|
+
[FloatVector, FloatVector, FloatVector, FloatMatrix],
|
|
59
|
+
FloatVector,
|
|
60
|
+
]
|
|
61
|
+
] = None,
|
|
62
|
+
calculate_custom_transform_linear_predictor_to_predictions_function: Optional[
|
|
63
|
+
Callable[[FloatVector], FloatVector]
|
|
64
|
+
] = None,
|
|
65
|
+
calculate_custom_differentiate_predictions_wrt_linear_predictor_function: Optional[
|
|
66
|
+
Callable[[FloatVector], FloatVector]
|
|
67
|
+
] = None,
|
|
68
|
+
boosting_steps_before_interactions_are_allowed: int = 0,
|
|
69
|
+
monotonic_constraints_ignore_interactions: bool = False,
|
|
70
|
+
group_mse_by_prediction_bins: int = 10,
|
|
71
|
+
group_mse_cycle_min_obs_in_bin: int = 30,
|
|
72
|
+
early_stopping_rounds: int = 500,
|
|
73
|
+
num_first_steps_with_linear_effects_only: int = 0,
|
|
74
|
+
penalty_for_non_linearity: float = 0.0,
|
|
75
|
+
penalty_for_interactions: float = 0.0,
|
|
76
|
+
max_terms: int = 0,
|
|
77
|
+
):
|
|
78
|
+
self.m = m
|
|
79
|
+
self.v = v
|
|
80
|
+
self.random_state = random_state
|
|
81
|
+
self.loss_function = loss_function
|
|
82
|
+
self.link_function = link_function
|
|
83
|
+
self.n_jobs = n_jobs
|
|
84
|
+
self.cv_folds = cv_folds
|
|
85
|
+
self.bins = bins
|
|
86
|
+
self.max_interaction_level = max_interaction_level
|
|
87
|
+
self.max_interactions = max_interactions
|
|
88
|
+
self.min_observations_in_split = min_observations_in_split
|
|
89
|
+
self.ineligible_boosting_steps_added = ineligible_boosting_steps_added
|
|
90
|
+
self.max_eligible_terms = max_eligible_terms
|
|
91
|
+
self.verbosity = verbosity
|
|
92
|
+
self.dispersion_parameter = dispersion_parameter
|
|
93
|
+
self.validation_tuning_metric = validation_tuning_metric
|
|
94
|
+
self.quantile = quantile
|
|
95
|
+
self.calculate_custom_validation_error_function = (
|
|
96
|
+
calculate_custom_validation_error_function
|
|
97
|
+
)
|
|
98
|
+
self.calculate_custom_loss_function = calculate_custom_loss_function
|
|
99
|
+
self.calculate_custom_negative_gradient_function = (
|
|
100
|
+
calculate_custom_negative_gradient_function
|
|
101
|
+
)
|
|
102
|
+
self.calculate_custom_transform_linear_predictor_to_predictions_function = (
|
|
103
|
+
calculate_custom_transform_linear_predictor_to_predictions_function
|
|
104
|
+
)
|
|
105
|
+
self.calculate_custom_differentiate_predictions_wrt_linear_predictor_function = (
|
|
106
|
+
calculate_custom_differentiate_predictions_wrt_linear_predictor_function
|
|
107
|
+
)
|
|
108
|
+
self.boosting_steps_before_interactions_are_allowed = (
|
|
109
|
+
boosting_steps_before_interactions_are_allowed
|
|
110
|
+
)
|
|
111
|
+
self.monotonic_constraints_ignore_interactions = (
|
|
112
|
+
monotonic_constraints_ignore_interactions
|
|
113
|
+
)
|
|
114
|
+
self.group_mse_by_prediction_bins = group_mse_by_prediction_bins
|
|
115
|
+
self.group_mse_cycle_min_obs_in_bin = group_mse_cycle_min_obs_in_bin
|
|
116
|
+
self.early_stopping_rounds = early_stopping_rounds
|
|
117
|
+
self.num_first_steps_with_linear_effects_only = (
|
|
118
|
+
num_first_steps_with_linear_effects_only
|
|
119
|
+
)
|
|
120
|
+
self.penalty_for_non_linearity = penalty_for_non_linearity
|
|
121
|
+
self.penalty_for_interactions = penalty_for_interactions
|
|
122
|
+
self.max_terms = max_terms
|
|
123
|
+
|
|
124
|
+
# Creating aplr_cpp and setting parameters
|
|
125
|
+
self.APLRRegressor = aplr_cpp.APLRRegressor()
|
|
126
|
+
self.__set_params_cpp()
|
|
127
|
+
|
|
128
|
+
# Sets parameters for aplr_cpp.APLRRegressor cpp object
|
|
129
|
+
def __set_params_cpp(self):
|
|
130
|
+
self.APLRRegressor.m = self.m
|
|
131
|
+
self.APLRRegressor.v = self.v
|
|
132
|
+
self.APLRRegressor.random_state = self.random_state
|
|
133
|
+
self.APLRRegressor.loss_function = self.loss_function
|
|
134
|
+
self.APLRRegressor.link_function = self.link_function
|
|
135
|
+
self.APLRRegressor.n_jobs = self.n_jobs
|
|
136
|
+
self.APLRRegressor.cv_folds = self.cv_folds
|
|
137
|
+
self.APLRRegressor.bins = self.bins
|
|
138
|
+
self.APLRRegressor.max_interaction_level = self.max_interaction_level
|
|
139
|
+
self.APLRRegressor.max_interactions = self.max_interactions
|
|
140
|
+
self.APLRRegressor.min_observations_in_split = self.min_observations_in_split
|
|
141
|
+
self.APLRRegressor.ineligible_boosting_steps_added = (
|
|
142
|
+
self.ineligible_boosting_steps_added
|
|
143
|
+
)
|
|
144
|
+
self.APLRRegressor.max_eligible_terms = self.max_eligible_terms
|
|
145
|
+
self.APLRRegressor.verbosity = self.verbosity
|
|
146
|
+
self.APLRRegressor.dispersion_parameter = self.dispersion_parameter
|
|
147
|
+
self.APLRRegressor.validation_tuning_metric = self.validation_tuning_metric
|
|
148
|
+
self.APLRRegressor.quantile = self.quantile
|
|
149
|
+
self.APLRRegressor.calculate_custom_validation_error_function = (
|
|
150
|
+
self.calculate_custom_validation_error_function
|
|
151
|
+
)
|
|
152
|
+
self.APLRRegressor.calculate_custom_loss_function = (
|
|
153
|
+
self.calculate_custom_loss_function
|
|
154
|
+
)
|
|
155
|
+
self.APLRRegressor.calculate_custom_negative_gradient_function = (
|
|
156
|
+
self.calculate_custom_negative_gradient_function
|
|
157
|
+
)
|
|
158
|
+
self.APLRRegressor.calculate_custom_transform_linear_predictor_to_predictions_function = (
|
|
159
|
+
self.calculate_custom_transform_linear_predictor_to_predictions_function
|
|
160
|
+
)
|
|
161
|
+
self.APLRRegressor.calculate_custom_differentiate_predictions_wrt_linear_predictor_function = (
|
|
162
|
+
self.calculate_custom_differentiate_predictions_wrt_linear_predictor_function
|
|
163
|
+
)
|
|
164
|
+
self.APLRRegressor.boosting_steps_before_interactions_are_allowed = (
|
|
165
|
+
self.boosting_steps_before_interactions_are_allowed
|
|
166
|
+
)
|
|
167
|
+
self.APLRRegressor.monotonic_constraints_ignore_interactions = (
|
|
168
|
+
self.monotonic_constraints_ignore_interactions
|
|
169
|
+
)
|
|
170
|
+
self.APLRRegressor.group_mse_by_prediction_bins = (
|
|
171
|
+
self.group_mse_by_prediction_bins
|
|
172
|
+
)
|
|
173
|
+
self.APLRRegressor.group_mse_cycle_min_obs_in_bin = (
|
|
174
|
+
self.group_mse_cycle_min_obs_in_bin
|
|
175
|
+
)
|
|
176
|
+
self.APLRRegressor.early_stopping_rounds = self.early_stopping_rounds
|
|
177
|
+
self.APLRRegressor.num_first_steps_with_linear_effects_only = (
|
|
178
|
+
self.num_first_steps_with_linear_effects_only
|
|
179
|
+
)
|
|
180
|
+
self.APLRRegressor.penalty_for_non_linearity = self.penalty_for_non_linearity
|
|
181
|
+
self.APLRRegressor.penalty_for_interactions = self.penalty_for_interactions
|
|
182
|
+
self.APLRRegressor.max_terms = self.max_terms
|
|
183
|
+
|
|
184
|
+
def fit(
|
|
185
|
+
self,
|
|
186
|
+
X: FloatMatrix,
|
|
187
|
+
y: FloatVector,
|
|
188
|
+
sample_weight: FloatVector = np.empty(0),
|
|
189
|
+
X_names: List[str] = [],
|
|
190
|
+
cv_observations: IntMatrix = np.empty([0, 0]),
|
|
191
|
+
prioritized_predictors_indexes: List[int] = [],
|
|
192
|
+
monotonic_constraints: List[int] = [],
|
|
193
|
+
group: FloatVector = np.empty(0),
|
|
194
|
+
interaction_constraints: List[List[int]] = [],
|
|
195
|
+
other_data: FloatMatrix = np.empty([0, 0]),
|
|
196
|
+
predictor_learning_rates: List[float] = [],
|
|
197
|
+
predictor_penalties_for_non_linearity: List[float] = [],
|
|
198
|
+
predictor_penalties_for_interactions: List[float] = [],
|
|
199
|
+
predictor_min_observations_in_split: List[int] = [],
|
|
200
|
+
):
|
|
201
|
+
self.__set_params_cpp()
|
|
202
|
+
self.APLRRegressor.fit(
|
|
203
|
+
X,
|
|
204
|
+
y,
|
|
205
|
+
sample_weight,
|
|
206
|
+
X_names,
|
|
207
|
+
cv_observations,
|
|
208
|
+
prioritized_predictors_indexes,
|
|
209
|
+
monotonic_constraints,
|
|
210
|
+
group,
|
|
211
|
+
interaction_constraints,
|
|
212
|
+
other_data,
|
|
213
|
+
predictor_learning_rates,
|
|
214
|
+
predictor_penalties_for_non_linearity,
|
|
215
|
+
predictor_penalties_for_interactions,
|
|
216
|
+
predictor_min_observations_in_split,
|
|
217
|
+
)
|
|
218
|
+
|
|
219
|
+
def predict(
|
|
220
|
+
self, X: FloatMatrix, cap_predictions_to_minmax_in_training: bool = True
|
|
221
|
+
) -> FloatVector:
|
|
222
|
+
if self.link_function == "custom_function":
|
|
223
|
+
self.APLRRegressor.calculate_custom_transform_linear_predictor_to_predictions_function = (
|
|
224
|
+
self.calculate_custom_transform_linear_predictor_to_predictions_function
|
|
225
|
+
)
|
|
226
|
+
return self.APLRRegressor.predict(X, cap_predictions_to_minmax_in_training)
|
|
227
|
+
|
|
228
|
+
def set_term_names(self, X_names: List[str]):
|
|
229
|
+
self.APLRRegressor.set_term_names(X_names)
|
|
230
|
+
|
|
231
|
+
def calculate_feature_importance(
|
|
232
|
+
self, X: FloatMatrix, sample_weight: FloatVector = np.empty(0)
|
|
233
|
+
) -> FloatVector:
|
|
234
|
+
return self.APLRRegressor.calculate_feature_importance(X, sample_weight)
|
|
235
|
+
|
|
236
|
+
def calculate_term_importance(
|
|
237
|
+
self, X: FloatMatrix, sample_weight: FloatVector = np.empty(0)
|
|
238
|
+
) -> FloatVector:
|
|
239
|
+
return self.APLRRegressor.calculate_term_importance(X, sample_weight)
|
|
240
|
+
|
|
241
|
+
def calculate_local_feature_contribution(self, X: FloatMatrix) -> FloatMatrix:
|
|
242
|
+
return self.APLRRegressor.calculate_local_feature_contribution(X)
|
|
243
|
+
|
|
244
|
+
def calculate_local_term_contribution(self, X: FloatMatrix) -> FloatMatrix:
|
|
245
|
+
return self.APLRRegressor.calculate_local_term_contribution(X)
|
|
246
|
+
|
|
247
|
+
def calculate_local_contribution_from_selected_terms(
|
|
248
|
+
self, X: FloatMatrix, predictor_indexes: List[int]
|
|
249
|
+
) -> FloatVector:
|
|
250
|
+
return self.APLRRegressor.calculate_local_contribution_from_selected_terms(
|
|
251
|
+
X, predictor_indexes
|
|
252
|
+
)
|
|
253
|
+
|
|
254
|
+
def calculate_terms(self, X: FloatMatrix) -> FloatMatrix:
|
|
255
|
+
return self.APLRRegressor.calculate_terms(X)
|
|
256
|
+
|
|
257
|
+
def get_term_names(self) -> List[str]:
|
|
258
|
+
return self.APLRRegressor.get_term_names()
|
|
259
|
+
|
|
260
|
+
def get_term_affiliations(self) -> List[str]:
|
|
261
|
+
return self.APLRRegressor.get_term_affiliations()
|
|
262
|
+
|
|
263
|
+
def get_unique_term_affiliations(self) -> List[str]:
|
|
264
|
+
return self.APLRRegressor.get_unique_term_affiliations()
|
|
265
|
+
|
|
266
|
+
def get_base_predictors_in_each_unique_term_affiliation(self) -> List[List[int]]:
|
|
267
|
+
return self.APLRRegressor.get_base_predictors_in_each_unique_term_affiliation()
|
|
268
|
+
|
|
269
|
+
def get_term_coefficients(self) -> FloatVector:
|
|
270
|
+
return self.APLRRegressor.get_term_coefficients()
|
|
271
|
+
|
|
272
|
+
def get_validation_error_steps(self) -> FloatMatrix:
|
|
273
|
+
return self.APLRRegressor.get_validation_error_steps()
|
|
274
|
+
|
|
275
|
+
def get_feature_importance(self) -> FloatVector:
|
|
276
|
+
return self.APLRRegressor.get_feature_importance()
|
|
277
|
+
|
|
278
|
+
def get_term_importance(self) -> FloatVector:
|
|
279
|
+
return self.APLRRegressor.get_term_importance()
|
|
280
|
+
|
|
281
|
+
def get_term_main_predictor_indexes(self) -> IntVector:
|
|
282
|
+
return self.APLRRegressor.get_term_main_predictor_indexes()
|
|
283
|
+
|
|
284
|
+
def get_term_interaction_levels(self) -> IntVector:
|
|
285
|
+
return self.APLRRegressor.get_term_interaction_levels()
|
|
286
|
+
|
|
287
|
+
def get_intercept(self) -> float:
|
|
288
|
+
return self.APLRRegressor.get_intercept()
|
|
289
|
+
|
|
290
|
+
def get_optimal_m(self) -> int:
|
|
291
|
+
return self.APLRRegressor.get_optimal_m()
|
|
292
|
+
|
|
293
|
+
def get_validation_tuning_metric(self) -> str:
|
|
294
|
+
return self.APLRRegressor.get_validation_tuning_metric()
|
|
295
|
+
|
|
296
|
+
def get_main_effect_shape(self, predictor_index: int) -> Dict[float, float]:
|
|
297
|
+
return self.APLRRegressor.get_main_effect_shape(predictor_index)
|
|
298
|
+
|
|
299
|
+
def get_unique_term_affiliation_shape(
|
|
300
|
+
self, unique_term_affiliation: str, max_rows_before_sampling: int = 100000
|
|
301
|
+
) -> FloatMatrix:
|
|
302
|
+
return self.APLRRegressor.get_unique_term_affiliation_shape(
|
|
303
|
+
unique_term_affiliation, max_rows_before_sampling
|
|
304
|
+
)
|
|
305
|
+
|
|
306
|
+
def get_cv_error(self) -> float:
|
|
307
|
+
return self.APLRRegressor.get_cv_error()
|
|
308
|
+
|
|
309
|
+
def set_intercept(self, value: float):
|
|
310
|
+
self.APLRRegressor.set_intercept(value)
|
|
311
|
+
|
|
312
|
+
# For sklearn
|
|
313
|
+
def get_params(self, deep=True):
|
|
314
|
+
return {
|
|
315
|
+
"m": self.m,
|
|
316
|
+
"v": self.v,
|
|
317
|
+
"random_state": self.random_state,
|
|
318
|
+
"loss_function": self.loss_function,
|
|
319
|
+
"link_function": self.link_function,
|
|
320
|
+
"n_jobs": self.n_jobs,
|
|
321
|
+
"cv_folds": self.cv_folds,
|
|
322
|
+
"bins": self.bins,
|
|
323
|
+
"max_interaction_level": self.max_interaction_level,
|
|
324
|
+
"max_interactions": self.max_interactions,
|
|
325
|
+
"verbosity": self.verbosity,
|
|
326
|
+
"min_observations_in_split": self.min_observations_in_split,
|
|
327
|
+
"ineligible_boosting_steps_added": self.ineligible_boosting_steps_added,
|
|
328
|
+
"max_eligible_terms": self.max_eligible_terms,
|
|
329
|
+
"dispersion_parameter": self.dispersion_parameter,
|
|
330
|
+
"validation_tuning_metric": self.validation_tuning_metric,
|
|
331
|
+
"quantile": self.quantile,
|
|
332
|
+
"calculate_custom_validation_error_function": self.calculate_custom_validation_error_function,
|
|
333
|
+
"calculate_custom_loss_function": self.calculate_custom_loss_function,
|
|
334
|
+
"calculate_custom_negative_gradient_function": self.calculate_custom_negative_gradient_function,
|
|
335
|
+
"calculate_custom_transform_linear_predictor_to_predictions_function": self.calculate_custom_transform_linear_predictor_to_predictions_function,
|
|
336
|
+
"calculate_custom_differentiate_predictions_wrt_linear_predictor_function": self.calculate_custom_differentiate_predictions_wrt_linear_predictor_function,
|
|
337
|
+
"boosting_steps_before_interactions_are_allowed": self.boosting_steps_before_interactions_are_allowed,
|
|
338
|
+
"monotonic_constraints_ignore_interactions": self.monotonic_constraints_ignore_interactions,
|
|
339
|
+
"group_mse_by_prediction_bins": self.group_mse_by_prediction_bins,
|
|
340
|
+
"group_mse_cycle_min_obs_in_bin": self.group_mse_cycle_min_obs_in_bin,
|
|
341
|
+
"early_stopping_rounds": self.early_stopping_rounds,
|
|
342
|
+
"num_first_steps_with_linear_effects_only": self.num_first_steps_with_linear_effects_only,
|
|
343
|
+
"penalty_for_non_linearity": self.penalty_for_non_linearity,
|
|
344
|
+
"penalty_for_interactions": self.penalty_for_interactions,
|
|
345
|
+
"max_terms": self.max_terms,
|
|
346
|
+
}
|
|
347
|
+
|
|
348
|
+
# For sklearn
|
|
349
|
+
def set_params(self, **parameters):
|
|
350
|
+
for parameter, value in parameters.items():
|
|
351
|
+
setattr(self, parameter, value)
|
|
352
|
+
self.__set_params_cpp()
|
|
353
|
+
return self
|
|
354
|
+
|
|
355
|
+
|
|
356
|
+
class APLRClassifier:
|
|
357
|
+
def __init__(
|
|
358
|
+
self,
|
|
359
|
+
m: int = 3000,
|
|
360
|
+
v: float = 0.5,
|
|
361
|
+
random_state: int = 0,
|
|
362
|
+
n_jobs: int = 0,
|
|
363
|
+
cv_folds: int = 5,
|
|
364
|
+
bins: int = 300,
|
|
365
|
+
verbosity: int = 0,
|
|
366
|
+
max_interaction_level: int = 1,
|
|
367
|
+
max_interactions: int = 100000,
|
|
368
|
+
min_observations_in_split: int = 4,
|
|
369
|
+
ineligible_boosting_steps_added: int = 15,
|
|
370
|
+
max_eligible_terms: int = 7,
|
|
371
|
+
boosting_steps_before_interactions_are_allowed: int = 0,
|
|
372
|
+
monotonic_constraints_ignore_interactions: bool = False,
|
|
373
|
+
early_stopping_rounds: int = 500,
|
|
374
|
+
num_first_steps_with_linear_effects_only: int = 0,
|
|
375
|
+
penalty_for_non_linearity: float = 0.0,
|
|
376
|
+
penalty_for_interactions: float = 0.0,
|
|
377
|
+
max_terms: int = 0,
|
|
378
|
+
):
|
|
379
|
+
self.m = m
|
|
380
|
+
self.v = v
|
|
381
|
+
self.random_state = random_state
|
|
382
|
+
self.n_jobs = n_jobs
|
|
383
|
+
self.cv_folds = cv_folds
|
|
384
|
+
self.bins = bins
|
|
385
|
+
self.verbosity = verbosity
|
|
386
|
+
self.max_interaction_level = max_interaction_level
|
|
387
|
+
self.max_interactions = max_interactions
|
|
388
|
+
self.min_observations_in_split = min_observations_in_split
|
|
389
|
+
self.ineligible_boosting_steps_added = ineligible_boosting_steps_added
|
|
390
|
+
self.max_eligible_terms = max_eligible_terms
|
|
391
|
+
self.boosting_steps_before_interactions_are_allowed = (
|
|
392
|
+
boosting_steps_before_interactions_are_allowed
|
|
393
|
+
)
|
|
394
|
+
self.monotonic_constraints_ignore_interactions = (
|
|
395
|
+
monotonic_constraints_ignore_interactions
|
|
396
|
+
)
|
|
397
|
+
self.early_stopping_rounds = early_stopping_rounds
|
|
398
|
+
self.num_first_steps_with_linear_effects_only = (
|
|
399
|
+
num_first_steps_with_linear_effects_only
|
|
400
|
+
)
|
|
401
|
+
self.penalty_for_non_linearity = penalty_for_non_linearity
|
|
402
|
+
self.penalty_for_interactions = penalty_for_interactions
|
|
403
|
+
self.max_terms = max_terms
|
|
404
|
+
|
|
405
|
+
# Creating aplr_cpp and setting parameters
|
|
406
|
+
self.APLRClassifier = aplr_cpp.APLRClassifier()
|
|
407
|
+
self.__set_params_cpp()
|
|
408
|
+
|
|
409
|
+
# Sets parameters for aplr_cpp.APLRClassifier cpp object
|
|
410
|
+
def __set_params_cpp(self):
|
|
411
|
+
self.APLRClassifier.m = self.m
|
|
412
|
+
self.APLRClassifier.v = self.v
|
|
413
|
+
self.APLRClassifier.random_state = self.random_state
|
|
414
|
+
self.APLRClassifier.n_jobs = self.n_jobs
|
|
415
|
+
self.APLRClassifier.cv_folds = self.cv_folds
|
|
416
|
+
self.APLRClassifier.bins = self.bins
|
|
417
|
+
self.APLRClassifier.verbosity = self.verbosity
|
|
418
|
+
self.APLRClassifier.max_interaction_level = self.max_interaction_level
|
|
419
|
+
self.APLRClassifier.max_interactions = self.max_interactions
|
|
420
|
+
self.APLRClassifier.min_observations_in_split = self.min_observations_in_split
|
|
421
|
+
self.APLRClassifier.ineligible_boosting_steps_added = (
|
|
422
|
+
self.ineligible_boosting_steps_added
|
|
423
|
+
)
|
|
424
|
+
self.APLRClassifier.max_eligible_terms = self.max_eligible_terms
|
|
425
|
+
self.APLRClassifier.boosting_steps_before_interactions_are_allowed = (
|
|
426
|
+
self.boosting_steps_before_interactions_are_allowed
|
|
427
|
+
)
|
|
428
|
+
self.APLRClassifier.monotonic_constraints_ignore_interactions = (
|
|
429
|
+
self.monotonic_constraints_ignore_interactions
|
|
430
|
+
)
|
|
431
|
+
self.APLRClassifier.early_stopping_rounds = self.early_stopping_rounds
|
|
432
|
+
self.APLRClassifier.num_first_steps_with_linear_effects_only = (
|
|
433
|
+
self.num_first_steps_with_linear_effects_only
|
|
434
|
+
)
|
|
435
|
+
self.APLRClassifier.penalty_for_non_linearity = self.penalty_for_non_linearity
|
|
436
|
+
self.APLRClassifier.penalty_for_interactions = self.penalty_for_interactions
|
|
437
|
+
self.APLRClassifier.max_terms = self.max_terms
|
|
438
|
+
|
|
439
|
+
def fit(
|
|
440
|
+
self,
|
|
441
|
+
X: FloatMatrix,
|
|
442
|
+
y: List[str],
|
|
443
|
+
sample_weight: FloatVector = np.empty(0),
|
|
444
|
+
X_names: List[str] = [],
|
|
445
|
+
cv_observations: IntMatrix = np.empty([0, 0]),
|
|
446
|
+
prioritized_predictors_indexes: List[int] = [],
|
|
447
|
+
monotonic_constraints: List[int] = [],
|
|
448
|
+
interaction_constraints: List[List[int]] = [],
|
|
449
|
+
predictor_learning_rates: List[float] = [],
|
|
450
|
+
predictor_penalties_for_non_linearity: List[float] = [],
|
|
451
|
+
predictor_penalties_for_interactions: List[float] = [],
|
|
452
|
+
predictor_min_observations_in_split: List[int] = [],
|
|
453
|
+
):
|
|
454
|
+
self.__set_params_cpp()
|
|
455
|
+
self.APLRClassifier.fit(
|
|
456
|
+
X,
|
|
457
|
+
y,
|
|
458
|
+
sample_weight,
|
|
459
|
+
X_names,
|
|
460
|
+
cv_observations,
|
|
461
|
+
prioritized_predictors_indexes,
|
|
462
|
+
monotonic_constraints,
|
|
463
|
+
interaction_constraints,
|
|
464
|
+
predictor_learning_rates,
|
|
465
|
+
predictor_penalties_for_non_linearity,
|
|
466
|
+
predictor_penalties_for_interactions,
|
|
467
|
+
predictor_min_observations_in_split,
|
|
468
|
+
)
|
|
469
|
+
# For sklearn
|
|
470
|
+
self.classes_ = np.arange(len(self.APLRClassifier.get_categories()))
|
|
471
|
+
|
|
472
|
+
def predict_class_probabilities(
|
|
473
|
+
self, X: FloatMatrix, cap_predictions_to_minmax_in_training: bool = False
|
|
474
|
+
) -> FloatMatrix:
|
|
475
|
+
return self.APLRClassifier.predict_class_probabilities(
|
|
476
|
+
X, cap_predictions_to_minmax_in_training
|
|
477
|
+
)
|
|
478
|
+
|
|
479
|
+
def predict(
|
|
480
|
+
self, X: FloatMatrix, cap_predictions_to_minmax_in_training: bool = False
|
|
481
|
+
) -> List[str]:
|
|
482
|
+
return self.APLRClassifier.predict(X, cap_predictions_to_minmax_in_training)
|
|
483
|
+
|
|
484
|
+
def calculate_local_feature_contribution(self, X: FloatMatrix) -> FloatMatrix:
|
|
485
|
+
return self.APLRClassifier.calculate_local_feature_contribution(X)
|
|
486
|
+
|
|
487
|
+
def get_categories(self) -> List[str]:
|
|
488
|
+
return self.APLRClassifier.get_categories()
|
|
489
|
+
|
|
490
|
+
def get_logit_model(self, category: str) -> APLRRegressor:
|
|
491
|
+
return self.APLRClassifier.get_logit_model(category)
|
|
492
|
+
|
|
493
|
+
def get_validation_error_steps(self) -> FloatMatrix:
|
|
494
|
+
return self.APLRClassifier.get_validation_error_steps()
|
|
495
|
+
|
|
496
|
+
def get_cv_error(self) -> float:
|
|
497
|
+
return self.APLRClassifier.get_cv_error()
|
|
498
|
+
|
|
499
|
+
def get_feature_importance(self) -> FloatVector:
|
|
500
|
+
return self.APLRClassifier.get_feature_importance()
|
|
501
|
+
|
|
502
|
+
def get_unique_term_affiliations(self) -> List[str]:
|
|
503
|
+
return self.APLRClassifier.get_unique_term_affiliations()
|
|
504
|
+
|
|
505
|
+
def get_base_predictors_in_each_unique_term_affiliation(self) -> List[List[int]]:
|
|
506
|
+
return self.APLRClassifier.get_base_predictors_in_each_unique_term_affiliation()
|
|
507
|
+
|
|
508
|
+
# For sklearn
|
|
509
|
+
def get_params(self, deep=True):
|
|
510
|
+
return {
|
|
511
|
+
"m": self.m,
|
|
512
|
+
"v": self.v,
|
|
513
|
+
"random_state": self.random_state,
|
|
514
|
+
"n_jobs": self.n_jobs,
|
|
515
|
+
"cv_folds": self.cv_folds,
|
|
516
|
+
"bins": self.bins,
|
|
517
|
+
"verbosity": self.verbosity,
|
|
518
|
+
"max_interaction_level": self.max_interaction_level,
|
|
519
|
+
"max_interactions": self.max_interactions,
|
|
520
|
+
"min_observations_in_split": self.min_observations_in_split,
|
|
521
|
+
"ineligible_boosting_steps_added": self.ineligible_boosting_steps_added,
|
|
522
|
+
"max_eligible_terms": self.max_eligible_terms,
|
|
523
|
+
"boosting_steps_before_interactions_are_allowed": self.boosting_steps_before_interactions_are_allowed,
|
|
524
|
+
"monotonic_constraints_ignore_interactions": self.monotonic_constraints_ignore_interactions,
|
|
525
|
+
"early_stopping_rounds": self.early_stopping_rounds,
|
|
526
|
+
"num_first_steps_with_linear_effects_only": self.num_first_steps_with_linear_effects_only,
|
|
527
|
+
"penalty_for_non_linearity": self.penalty_for_non_linearity,
|
|
528
|
+
"penalty_for_interactions": self.penalty_for_interactions,
|
|
529
|
+
"max_terms": self.max_terms,
|
|
530
|
+
}
|
|
531
|
+
|
|
532
|
+
# For sklearn
|
|
533
|
+
def set_params(self, **parameters):
|
|
534
|
+
for parameter, value in parameters.items():
|
|
535
|
+
setattr(self, parameter, value)
|
|
536
|
+
self.__set_params_cpp()
|
|
537
|
+
return self
|
|
538
|
+
|
|
539
|
+
# For sklearn
|
|
540
|
+
def predict_proba(self, X: FloatMatrix) -> FloatMatrix:
|
|
541
|
+
return self.predict_class_probabilities(X)
|
|
542
|
+
|
|
543
|
+
|
|
544
|
+
class APLRTuner:
|
|
545
|
+
def __init__(
|
|
546
|
+
self,
|
|
547
|
+
parameters: Union[Dict[str, List[float]], List[Dict[str, List[float]]]] = {
|
|
548
|
+
"max_interaction_level": [0, 1],
|
|
549
|
+
"min_observations_in_split": [4, 10, 20, 100, 500, 1000],
|
|
550
|
+
},
|
|
551
|
+
is_regressor: bool = True,
|
|
552
|
+
):
|
|
553
|
+
self.parameters = parameters
|
|
554
|
+
self.is_regressor = is_regressor
|
|
555
|
+
self.parameter_grid = self._create_parameter_grid()
|
|
556
|
+
|
|
557
|
+
def _create_parameter_grid(self) -> List[Dict[str, float]]:
|
|
558
|
+
items = sorted(self.parameters.items())
|
|
559
|
+
keys, values = zip(*items)
|
|
560
|
+
combinations = list(itertools.product(*values))
|
|
561
|
+
grid = [dict(zip(keys, combination)) for combination in combinations]
|
|
562
|
+
return grid
|
|
563
|
+
|
|
564
|
+
def fit(self, X: FloatMatrix, y: FloatVector, **kwargs):
|
|
565
|
+
self.cv_results: List[Dict[str, float]] = []
|
|
566
|
+
best_validation_result = np.inf
|
|
567
|
+
for params in self.parameter_grid:
|
|
568
|
+
if self.is_regressor:
|
|
569
|
+
model = APLRRegressor(**params)
|
|
570
|
+
else:
|
|
571
|
+
model = APLRClassifier(**params)
|
|
572
|
+
model.fit(X, y, **kwargs)
|
|
573
|
+
cv_error_for_this_model = model.get_cv_error()
|
|
574
|
+
cv_results_for_this_model = model.get_params()
|
|
575
|
+
cv_results_for_this_model["cv_error"] = cv_error_for_this_model
|
|
576
|
+
self.cv_results.append(cv_results_for_this_model)
|
|
577
|
+
if cv_error_for_this_model < best_validation_result:
|
|
578
|
+
best_validation_result = cv_error_for_this_model
|
|
579
|
+
self.best_model = model
|
|
580
|
+
self.cv_results = sorted(self.cv_results, key=lambda x: x["cv_error"])
|
|
581
|
+
|
|
582
|
+
def predict(self, X: FloatMatrix, **kwargs) -> Union[FloatVector, List[str]]:
|
|
583
|
+
return self.best_model.predict(X, **kwargs)
|
|
584
|
+
|
|
585
|
+
def predict_class_probabilities(self, X: FloatMatrix, **kwargs) -> FloatMatrix:
|
|
586
|
+
if self.is_regressor == False:
|
|
587
|
+
return self.best_model.predict_class_probabilities(X, **kwargs)
|
|
588
|
+
else:
|
|
589
|
+
raise TypeError(
|
|
590
|
+
"predict_class_probabilities is only possible when is_regressor is False"
|
|
591
|
+
)
|
|
592
|
+
|
|
593
|
+
def predict_proba(self, X: FloatMatrix, **kwargs) -> FloatMatrix:
|
|
594
|
+
return self.predict_class_probabilities(X, **kwargs)
|
|
595
|
+
|
|
596
|
+
def get_best_estimator(self) -> Union[APLRClassifier, APLRRegressor]:
|
|
597
|
+
return self.best_model
|
|
598
|
+
|
|
599
|
+
def get_cv_results(self) -> List[Dict[str, float]]:
|
|
600
|
+
return self.cv_results
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2022 Mathias von Ottenbreit <ottenbreitdatascience@gmail.com>
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
Metadata-Version: 2.1
|
|
2
|
+
Name: aplr
|
|
3
|
+
Version: 10.8.0
|
|
4
|
+
Summary: Automatic Piecewise Linear Regression
|
|
5
|
+
Home-page: https://github.com/ottenbreit-data-science/aplr
|
|
6
|
+
Author: Mathias von Ottenbreit
|
|
7
|
+
Author-email: ottenbreitdatascience@gmail.com
|
|
8
|
+
License: MIT
|
|
9
|
+
Platform: Windows
|
|
10
|
+
Platform: Linux
|
|
11
|
+
Platform: MacOS
|
|
12
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
13
|
+
Requires-Python: >=3.8
|
|
14
|
+
Description-Content-Type: text/markdown
|
|
15
|
+
License-File: LICENSE
|
|
16
|
+
Requires-Dist: numpy>=1.11
|
|
17
|
+
|
|
18
|
+
# APLR
|
|
19
|
+
**Automatic Piecewise Linear Regression**
|
|
20
|
+
|
|
21
|
+
## About
|
|
22
|
+
APLR allows you to build predictive and interpretable regression or classification machine learning models in Python, using the Automatic Piecewise Linear Regression (APLR) methodology developed by Mathias von Ottenbreit. APLR often rivals tree-based methods in predictive accuracy, while offering smoother, more interpretable predictions.
|
|
23
|
+
|
|
24
|
+
For further details, see the [documentation](https://github.com/ottenbreit-data-science/aplr/tree/main/documentation). You may also read the published article for additional insights: [Link 1](https://link.springer.com/article/10.1007/s00180-024-01475-4) and [Link 2](https://rdcu.be/dz7bF). Additional functionality has been added since the article was published.
|
|
25
|
+
|
|
26
|
+
## Installation
|
|
27
|
+
To install APLR, use the following command:
|
|
28
|
+
|
|
29
|
+
```bash
|
|
30
|
+
pip install aplr
|
|
31
|
+
```
|
|
32
|
+
|
|
33
|
+
## Availability
|
|
34
|
+
APLR is available for Windows, most Linux distributions, and macOS.
|
|
35
|
+
|
|
36
|
+
## Usage
|
|
37
|
+
Example Python scripts are available [here](https://github.com/ottenbreit-data-science/aplr/tree/main/examples).
|
|
38
|
+
|
|
39
|
+
## Sponsorship
|
|
40
|
+
Consider sponsoring Von Ottenbreit Data Science by clicking the **Sponsor** button on the repository. Sufficient funding will help maintain and further develop APLR.
|
|
41
|
+
|
|
42
|
+
## API Reference
|
|
43
|
+
- [API reference for regression](https://github.com/ottenbreit-data-science/aplr/blob/main/API_REFERENCE_FOR_REGRESSION.md)
|
|
44
|
+
- [API reference for classification](https://github.com/ottenbreit-data-science/aplr/blob/main/API_REFERENCE_FOR_CLASSIFICATION.md)
|
|
45
|
+
|
|
46
|
+
## Contact Information
|
|
47
|
+
For inquiries, please email: [ottenbreitdatascience@gmail.com](mailto:ottenbreitdatascience@gmail.com)
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
aplr_cpp.cpython-38-darwin.so,sha256=PaUtNOZFrOtDFUa_cNr3w2VGiGY2S_00SxmN_eN-mb4,1245728
|
|
2
|
+
aplr-10.8.0.dist-info/RECORD,,
|
|
3
|
+
aplr-10.8.0.dist-info/LICENSE,sha256=g4qcQtkSVPHtGRi3T93DoFCrssvW6ij_emU-2fj_xfY,1113
|
|
4
|
+
aplr-10.8.0.dist-info/WHEEL,sha256=BmLHWpvgEGxzQ0a-0fMfPaUmirqQ6nY7ysk8YssV2bA,108
|
|
5
|
+
aplr-10.8.0.dist-info/top_level.txt,sha256=DXVC0RIFGpzVnPeKWAZTXQdJheOEZL51Wip6Fx7zbR4,14
|
|
6
|
+
aplr-10.8.0.dist-info/METADATA,sha256=Yyv9wyoe4hFRpGsoMQGQsCROdf4-J_D7WEI4fdMG4D4,2107
|
|
7
|
+
aplr/__init__.py,sha256=rRfTgNWnYZlFatyA920lWqBcjwmQUI7FcvEPFUTJgzE,20
|
|
8
|
+
aplr/aplr.py,sha256=moW3FR738q7lCxl_xgzXwoPFMYp2x__aNPOfu5-AMV8,26323
|
|
Binary file
|