aplr 10.19.2__cp313-cp313-macosx_11_0_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
aplr/__init__.py ADDED
@@ -0,0 +1 @@
1
+ from .aplr import *
aplr/aplr.py ADDED
@@ -0,0 +1,1127 @@
1
+ from typing import List, Callable, Optional, Dict, Union, Tuple
2
+ import numpy as np
3
+ import pandas as pd
4
+ import aplr_cpp
5
+ import itertools
6
+
7
+ FloatVector = np.ndarray
8
+ FloatMatrix = np.ndarray
9
+ IntVector = np.ndarray
10
+ IntMatrix = np.ndarray
11
+
12
+
13
+ class BaseAPLR:
14
+ def _preprocess_X_fit(
15
+ self,
16
+ X: Union[pd.DataFrame, FloatMatrix],
17
+ X_names: List[str],
18
+ sample_weight: FloatVector,
19
+ ) -> Tuple[FloatMatrix, List[str]]:
20
+ if sample_weight.size > 0:
21
+ if sample_weight.ndim != 1:
22
+ raise ValueError("sample_weight must be a 1D array.")
23
+ if len(sample_weight) != X.shape[0]:
24
+ raise ValueError(
25
+ "sample_weight must have the same number of rows as X."
26
+ )
27
+ if np.any(np.isnan(sample_weight)) or np.any(np.isinf(sample_weight)):
28
+ raise ValueError("sample_weight cannot contain nan or infinite values.")
29
+ if np.any(sample_weight < 0):
30
+ raise ValueError("sample_weight cannot contain negative values.")
31
+
32
+ self._fit_preprocessor(X, X_names, sample_weight)
33
+
34
+ X = self._transform_X(X)
35
+
36
+ return X.to_numpy(dtype=np.float64), list(X.columns)
37
+
38
+ def _preprocess_X_predict(self, X: Union[pd.DataFrame, FloatMatrix]) -> FloatMatrix:
39
+ X = self._transform_X(X)
40
+ return X.to_numpy(dtype=np.float64)
41
+
42
+ def _fit_preprocessor(
43
+ self,
44
+ X: Union[pd.DataFrame, FloatMatrix],
45
+ X_names: List[str],
46
+ sample_weight: FloatVector,
47
+ ) -> None:
48
+ """Learns transformations from the training data and sets preprocessor state."""
49
+ X = self._convert_input_to_dataframe_for_fit(X, X_names=X_names)
50
+ self.X_names_ = list(X.columns)
51
+ self.categorical_features_ = list(
52
+ X.select_dtypes(include=["category", "object"]).columns
53
+ )
54
+
55
+ self._fit_one_hot_encoding(X)
56
+ self._fit_missing_indicators(X)
57
+
58
+ # Learn median values for imputation from the original data.
59
+ self.median_values_ = {}
60
+ numeric_cols_for_median = [
61
+ col for col in X.columns if col not in self.categorical_features_
62
+ ]
63
+ for col in numeric_cols_for_median:
64
+ missing_mask = X[col].isnull()
65
+ if sample_weight.size > 0:
66
+ valid_indices = ~missing_mask
67
+ col_data = X.loc[valid_indices, col]
68
+ col_weights = sample_weight[valid_indices]
69
+ if col_data.empty:
70
+ median_val = 0
71
+ else:
72
+ col_data_np = col_data.to_numpy()
73
+ sort_indices = np.argsort(col_data_np, kind="stable")
74
+ sorted_data = col_data_np[sort_indices]
75
+ sorted_weights = col_weights[sort_indices]
76
+ cumulative_weights = np.cumsum(sorted_weights)
77
+ total_weight = cumulative_weights[-1]
78
+ median_weight_index = np.searchsorted(
79
+ cumulative_weights, total_weight / 2.0
80
+ )
81
+ if median_weight_index >= len(sorted_data):
82
+ median_weight_index = len(sorted_data) - 1
83
+ median_val = sorted_data[median_weight_index]
84
+ else:
85
+ if X[col].isnull().all():
86
+ median_val = 0
87
+ else:
88
+ median_val = X[col].median()
89
+
90
+ if pd.isna(median_val):
91
+ median_val = 0
92
+ self.median_values_[col] = median_val
93
+
94
+ # Determine the final column names after all transformations.
95
+ final_cols = []
96
+ if self.ohe_columns_:
97
+ final_cols.extend(self.ohe_columns_)
98
+ else:
99
+ final_cols.extend(self.X_names_)
100
+ final_cols.extend([col + "_missing" for col in self.na_imputed_cols_])
101
+ self.final_training_columns_ = final_cols
102
+
103
+ def _fit_one_hot_encoding(self, X: pd.DataFrame) -> None:
104
+ """Learns the complete set of columns that will exist after one-hot encoding."""
105
+ if not self.categorical_features_:
106
+ return
107
+ self.ohe_columns_ = list(
108
+ pd.get_dummies(
109
+ X, columns=self.categorical_features_, dummy_na=False
110
+ ).columns
111
+ )
112
+
113
+ def _fit_missing_indicators(self, X: pd.DataFrame) -> None:
114
+ """Learns which columns will have missing indicators added."""
115
+ self.na_imputed_cols_ = [col for col in X.columns if X[col].isnull().any()]
116
+
117
+ def _transform_X(self, X: Union[pd.DataFrame, FloatMatrix]) -> pd.DataFrame:
118
+ """Transforms data using the fitted preprocessor attributes."""
119
+ X = self._convert_input_to_dataframe_for_transform(X)
120
+ X = self._transform_one_hot_encoding(X)
121
+
122
+ # Just-in-time copy to avoid modifying user's original data.
123
+ # A copy is needed if we are about to perform in-place modifications
124
+ # (adding missing indicators or filling NaNs) and a copy hasn't already
125
+ # been made by one-hot encoding.
126
+ if not self.categorical_features_ and X.isnull().to_numpy().any():
127
+ X = X.copy()
128
+
129
+ X = self._transform_missing_indicators(X)
130
+
131
+ for col, val in self.median_values_.items():
132
+ if col in X.columns:
133
+ X[col] = X[col].fillna(val)
134
+
135
+ # Enforce final column order and add missing columns if necessary
136
+ if self.final_training_columns_:
137
+ missing_final_cols = set(self.final_training_columns_) - set(X.columns)
138
+ for c in missing_final_cols:
139
+ X[c] = 0
140
+ if not X.columns.equals(pd.Index(self.final_training_columns_)):
141
+ X = X.reindex(columns=self.final_training_columns_, copy=False)
142
+
143
+ return X
144
+
145
+ def _transform_one_hot_encoding(self, X: pd.DataFrame) -> pd.DataFrame:
146
+ """Applies one-hot encoding using learned OHE columns during transformation."""
147
+ if not self.categorical_features_:
148
+ return X
149
+
150
+ X = pd.get_dummies(X, columns=self.categorical_features_, dummy_na=False)
151
+ # Handle missing OHE columns (categories not seen in new data)
152
+ missing_cols = set(self.ohe_columns_) - set(X.columns)
153
+ for c in missing_cols:
154
+ X[c] = 0
155
+ # Ensure column order
156
+ if not X.columns.equals(pd.Index(self.ohe_columns_)):
157
+ X = X.reindex(columns=self.ohe_columns_, copy=False)
158
+ return X
159
+
160
+ def _transform_missing_indicators(self, X: pd.DataFrame) -> pd.DataFrame:
161
+ """Adds _missing indicator columns for features with NaNs during transformation."""
162
+ if not self.na_imputed_cols_:
163
+ return X
164
+ # Only add indicators for columns that were imputed during fit and are currently missing data.
165
+ for col in self.na_imputed_cols_:
166
+ if col in X.columns and X[col].isnull().any():
167
+ X[col + "_missing"] = X[col].isnull().astype(int)
168
+ return X
169
+
170
+ def _convert_input_to_dataframe_for_fit(
171
+ self,
172
+ X: Union[pd.DataFrame, FloatMatrix],
173
+ X_names: Optional[List[str]] = None,
174
+ ) -> pd.DataFrame:
175
+ """Converts input X to a pandas DataFrame for fitting, handling column names."""
176
+ X, was_converted = self._to_dataframe(X)
177
+ if was_converted:
178
+ if X_names:
179
+ X.columns = list(X_names)
180
+ else:
181
+ X.columns = [f"X{i}" for i in range(X.shape[1])]
182
+ return X
183
+
184
+ def _convert_input_to_dataframe_for_transform(
185
+ self, X: Union[pd.DataFrame, FloatMatrix]
186
+ ) -> pd.DataFrame:
187
+ """Converts input X to a pandas DataFrame for transformation, aligning columns."""
188
+ X, was_converted = self._to_dataframe(X)
189
+ if was_converted:
190
+ if self.X_names_ and len(self.X_names_) == X.shape[1]:
191
+ X.columns = self.X_names_ # Use names learned during fit
192
+ else: # If X was already a DataFrame
193
+ if set(X.columns) != set(self.X_names_):
194
+ raise ValueError(
195
+ "Input columns for prediction do not match training columns."
196
+ )
197
+ if not X.columns.equals(pd.Index(self.X_names_)):
198
+ X = X.reindex(columns=self.X_names_, copy=False)
199
+ return X
200
+
201
+ def _to_dataframe(
202
+ self, X: Union[pd.DataFrame, FloatMatrix]
203
+ ) -> Tuple[pd.DataFrame, bool]:
204
+ """Converts input to a pandas DataFrame if it is not already one."""
205
+ if isinstance(X, pd.DataFrame):
206
+ return X, False # Was already a DataFrame
207
+
208
+ X_numeric: np.ndarray
209
+ try:
210
+ # If X is already a numpy array, astype with copy=False is more efficient.
211
+ # It will only copy if the dtype is different from np.float64.
212
+ if isinstance(X, np.ndarray):
213
+ X_numeric = X.astype(np.float64, copy=False)
214
+ else:
215
+ # For other array-likes (e.g., list of lists), create the array.
216
+ X_numeric = np.array(X, dtype=np.float64)
217
+ except (ValueError, TypeError) as e:
218
+ raise TypeError("Input X must be numeric if not a pandas DataFrame.") from e
219
+ return pd.DataFrame(X_numeric, copy=False), True # Was converted
220
+
221
+ def __setstate__(self, state):
222
+ """Handles unpickling for backward compatibility."""
223
+ self.__dict__.update(state)
224
+
225
+ # For backward compatibility, initialize new attributes if they don't exist,
226
+ # indicating the model was trained before these features were introduced.
227
+ new_attributes = {
228
+ "X_names_": [],
229
+ "categorical_features_": [],
230
+ "ohe_columns_": [],
231
+ "na_imputed_cols_": [],
232
+ "median_values_": {},
233
+ "final_training_columns_": [],
234
+ }
235
+ for attr, default_value in new_attributes.items():
236
+ if not hasattr(self, attr):
237
+ setattr(self, attr, default_value)
238
+
239
+ def _validate_X_fit_rows(self, X):
240
+ """Checks if X has enough rows to be fitted."""
241
+ if (isinstance(X, np.ndarray) and X.shape[0] < 2) or (
242
+ isinstance(X, pd.DataFrame) and len(X) < 2
243
+ ):
244
+ raise ValueError("Input X must have at least 2 rows to be fitted.")
245
+
246
+
247
+ class APLRRegressor(BaseAPLR):
248
+ def __init__(
249
+ self,
250
+ m: int = 3000,
251
+ v: float = 0.5,
252
+ random_state: int = 0,
253
+ loss_function: str = "mse",
254
+ link_function: str = "identity",
255
+ n_jobs: int = 0,
256
+ cv_folds: int = 5,
257
+ bins: int = 300,
258
+ max_interaction_level: int = 1,
259
+ max_interactions: int = 100000,
260
+ min_observations_in_split: int = 4,
261
+ ineligible_boosting_steps_added: int = 15,
262
+ max_eligible_terms: int = 7,
263
+ verbosity: int = 0,
264
+ dispersion_parameter: float = 1.5,
265
+ validation_tuning_metric: str = "default",
266
+ quantile: float = 0.5,
267
+ calculate_custom_validation_error_function: Optional[
268
+ Callable[
269
+ [
270
+ FloatVector,
271
+ FloatVector,
272
+ FloatVector,
273
+ FloatVector,
274
+ FloatMatrix,
275
+ ],
276
+ float,
277
+ ]
278
+ ] = None,
279
+ calculate_custom_loss_function: Optional[
280
+ Callable[
281
+ [
282
+ FloatVector,
283
+ FloatVector,
284
+ FloatVector,
285
+ FloatVector,
286
+ FloatMatrix,
287
+ ],
288
+ float,
289
+ ]
290
+ ] = None,
291
+ calculate_custom_negative_gradient_function: Optional[
292
+ Callable[
293
+ [FloatVector, FloatVector, FloatVector, FloatMatrix],
294
+ FloatVector,
295
+ ]
296
+ ] = None,
297
+ calculate_custom_transform_linear_predictor_to_predictions_function: Optional[
298
+ Callable[[FloatVector], FloatVector]
299
+ ] = None,
300
+ calculate_custom_differentiate_predictions_wrt_linear_predictor_function: Optional[
301
+ Callable[[FloatVector], FloatVector]
302
+ ] = None,
303
+ boosting_steps_before_interactions_are_allowed: int = 0,
304
+ monotonic_constraints_ignore_interactions: bool = False,
305
+ group_mse_by_prediction_bins: int = 10,
306
+ group_mse_cycle_min_obs_in_bin: int = 30,
307
+ early_stopping_rounds: int = 200,
308
+ num_first_steps_with_linear_effects_only: int = 0,
309
+ penalty_for_non_linearity: float = 0.0,
310
+ penalty_for_interactions: float = 0.0,
311
+ max_terms: int = 0,
312
+ ridge_penalty: float = 0.0001,
313
+ mean_bias_correction: bool = False,
314
+ faster_convergence: bool = False,
315
+ ):
316
+ self.m = m
317
+ self.v = v
318
+ self.random_state = random_state
319
+ self.loss_function = loss_function
320
+ self.link_function = link_function
321
+ self.n_jobs = n_jobs
322
+ self.cv_folds = cv_folds
323
+ self.bins = bins
324
+ self.max_interaction_level = max_interaction_level
325
+ self.max_interactions = max_interactions
326
+ self.min_observations_in_split = min_observations_in_split
327
+ self.ineligible_boosting_steps_added = ineligible_boosting_steps_added
328
+ self.max_eligible_terms = max_eligible_terms
329
+ self.verbosity = verbosity
330
+ self.dispersion_parameter = dispersion_parameter
331
+ self.validation_tuning_metric = validation_tuning_metric
332
+ self.quantile = quantile
333
+ self.calculate_custom_validation_error_function = (
334
+ calculate_custom_validation_error_function
335
+ )
336
+ self.calculate_custom_loss_function = calculate_custom_loss_function
337
+ self.calculate_custom_negative_gradient_function = (
338
+ calculate_custom_negative_gradient_function
339
+ )
340
+ self.calculate_custom_transform_linear_predictor_to_predictions_function = (
341
+ calculate_custom_transform_linear_predictor_to_predictions_function
342
+ )
343
+ self.calculate_custom_differentiate_predictions_wrt_linear_predictor_function = (
344
+ calculate_custom_differentiate_predictions_wrt_linear_predictor_function
345
+ )
346
+ self.boosting_steps_before_interactions_are_allowed = (
347
+ boosting_steps_before_interactions_are_allowed
348
+ )
349
+ self.monotonic_constraints_ignore_interactions = (
350
+ monotonic_constraints_ignore_interactions
351
+ )
352
+ self.group_mse_by_prediction_bins = group_mse_by_prediction_bins
353
+ self.group_mse_cycle_min_obs_in_bin = group_mse_cycle_min_obs_in_bin
354
+ self.early_stopping_rounds = early_stopping_rounds
355
+ self.num_first_steps_with_linear_effects_only = (
356
+ num_first_steps_with_linear_effects_only
357
+ )
358
+ self.penalty_for_non_linearity = penalty_for_non_linearity
359
+ self.penalty_for_interactions = penalty_for_interactions
360
+ self.max_terms = max_terms
361
+ self.ridge_penalty = ridge_penalty
362
+ self.mean_bias_correction = mean_bias_correction
363
+ self.faster_convergence = faster_convergence
364
+
365
+ # Data transformations
366
+ self.median_values_ = {}
367
+ self.categorical_features_ = []
368
+ self.ohe_columns_ = []
369
+ self.na_imputed_cols_ = []
370
+ self.X_names_ = []
371
+ self.final_training_columns_ = []
372
+
373
+ # Creating aplr_cpp and setting parameters
374
+ self.APLRRegressor = aplr_cpp.APLRRegressor()
375
+ self.__set_params_cpp()
376
+
377
+ # Sets parameters for aplr_cpp.APLRRegressor cpp object
378
+ def __set_params_cpp(self):
379
+ self.APLRRegressor.m = self.m
380
+ self.APLRRegressor.v = self.v
381
+ self.APLRRegressor.random_state = self.random_state
382
+ self.APLRRegressor.loss_function = self.loss_function
383
+ self.APLRRegressor.link_function = self.link_function
384
+ self.APLRRegressor.n_jobs = self.n_jobs
385
+ self.APLRRegressor.cv_folds = self.cv_folds
386
+ self.APLRRegressor.bins = self.bins
387
+ self.APLRRegressor.max_interaction_level = self.max_interaction_level
388
+ self.APLRRegressor.max_interactions = self.max_interactions
389
+ self.APLRRegressor.min_observations_in_split = self.min_observations_in_split
390
+ self.APLRRegressor.ineligible_boosting_steps_added = (
391
+ self.ineligible_boosting_steps_added
392
+ )
393
+ self.APLRRegressor.max_eligible_terms = self.max_eligible_terms
394
+ self.APLRRegressor.verbosity = self.verbosity
395
+ self.APLRRegressor.dispersion_parameter = self.dispersion_parameter
396
+ self.APLRRegressor.validation_tuning_metric = self.validation_tuning_metric
397
+ self.APLRRegressor.quantile = self.quantile
398
+ self.APLRRegressor.calculate_custom_validation_error_function = (
399
+ self.calculate_custom_validation_error_function
400
+ )
401
+ self.APLRRegressor.calculate_custom_loss_function = (
402
+ self.calculate_custom_loss_function
403
+ )
404
+ self.APLRRegressor.calculate_custom_negative_gradient_function = (
405
+ self.calculate_custom_negative_gradient_function
406
+ )
407
+ self.APLRRegressor.calculate_custom_transform_linear_predictor_to_predictions_function = (
408
+ self.calculate_custom_transform_linear_predictor_to_predictions_function
409
+ )
410
+ self.APLRRegressor.calculate_custom_differentiate_predictions_wrt_linear_predictor_function = (
411
+ self.calculate_custom_differentiate_predictions_wrt_linear_predictor_function
412
+ )
413
+ self.APLRRegressor.boosting_steps_before_interactions_are_allowed = (
414
+ self.boosting_steps_before_interactions_are_allowed
415
+ )
416
+ self.APLRRegressor.monotonic_constraints_ignore_interactions = (
417
+ self.monotonic_constraints_ignore_interactions
418
+ )
419
+ self.APLRRegressor.group_mse_by_prediction_bins = (
420
+ self.group_mse_by_prediction_bins
421
+ )
422
+ self.APLRRegressor.group_mse_cycle_min_obs_in_bin = (
423
+ self.group_mse_cycle_min_obs_in_bin
424
+ )
425
+ self.APLRRegressor.early_stopping_rounds = self.early_stopping_rounds
426
+ self.APLRRegressor.num_first_steps_with_linear_effects_only = (
427
+ self.num_first_steps_with_linear_effects_only
428
+ )
429
+ self.APLRRegressor.penalty_for_non_linearity = self.penalty_for_non_linearity
430
+ self.APLRRegressor.penalty_for_interactions = self.penalty_for_interactions
431
+ self.APLRRegressor.max_terms = self.max_terms
432
+ self.APLRRegressor.ridge_penalty = self.ridge_penalty
433
+ self.APLRRegressor.mean_bias_correction = self.mean_bias_correction
434
+ self.APLRRegressor.faster_convergence = self.faster_convergence
435
+
436
+ def fit(
437
+ self,
438
+ X: Union[pd.DataFrame, FloatMatrix],
439
+ y: FloatVector,
440
+ sample_weight: FloatVector = np.empty(0),
441
+ X_names: List[str] = [],
442
+ cv_observations: IntMatrix = np.empty([0, 0]),
443
+ prioritized_predictors_indexes: List[int] = [],
444
+ monotonic_constraints: List[int] = [],
445
+ group: FloatVector = np.empty(0),
446
+ interaction_constraints: List[List[int]] = [],
447
+ other_data: FloatMatrix = np.empty([0, 0]),
448
+ predictor_learning_rates: List[float] = [],
449
+ predictor_penalties_for_non_linearity: List[float] = [],
450
+ predictor_penalties_for_interactions: List[float] = [],
451
+ predictor_min_observations_in_split: List[int] = [],
452
+ ):
453
+ self._validate_X_fit_rows(X)
454
+ self.__set_params_cpp()
455
+ X_transformed, X_names_transformed = self._preprocess_X_fit(
456
+ X, X_names, sample_weight
457
+ )
458
+ self.APLRRegressor.fit(
459
+ X_transformed,
460
+ y,
461
+ sample_weight,
462
+ X_names_transformed,
463
+ cv_observations,
464
+ prioritized_predictors_indexes,
465
+ monotonic_constraints,
466
+ group,
467
+ interaction_constraints,
468
+ other_data,
469
+ predictor_learning_rates,
470
+ predictor_penalties_for_non_linearity,
471
+ predictor_penalties_for_interactions,
472
+ predictor_min_observations_in_split,
473
+ )
474
+
475
+ def predict(
476
+ self,
477
+ X: Union[pd.DataFrame, FloatMatrix],
478
+ cap_predictions_to_minmax_in_training: bool = True,
479
+ ) -> FloatVector:
480
+ if self.link_function == "custom_function":
481
+ self.APLRRegressor.calculate_custom_transform_linear_predictor_to_predictions_function = (
482
+ self.calculate_custom_transform_linear_predictor_to_predictions_function
483
+ )
484
+ X_transformed = self._preprocess_X_predict(X)
485
+ return self.APLRRegressor.predict(
486
+ X_transformed, cap_predictions_to_minmax_in_training
487
+ )
488
+
489
+ def set_term_names(self, X_names: List[str]):
490
+ self.APLRRegressor.set_term_names(X_names)
491
+
492
+ def calculate_feature_importance(
493
+ self,
494
+ X: Union[pd.DataFrame, FloatMatrix],
495
+ sample_weight: FloatVector = np.empty(0),
496
+ ) -> FloatVector:
497
+ X_transformed = self._preprocess_X_predict(X)
498
+ return self.APLRRegressor.calculate_feature_importance(
499
+ X_transformed, sample_weight
500
+ )
501
+
502
+ def calculate_term_importance(
503
+ self,
504
+ X: Union[pd.DataFrame, FloatMatrix],
505
+ sample_weight: FloatVector = np.empty(0),
506
+ ) -> FloatVector:
507
+ X_transformed = self._preprocess_X_predict(X)
508
+ return self.APLRRegressor.calculate_term_importance(
509
+ X_transformed, sample_weight
510
+ )
511
+
512
+ def calculate_local_feature_contribution(
513
+ self, X: Union[pd.DataFrame, FloatMatrix]
514
+ ) -> FloatMatrix:
515
+ X_transformed = self._preprocess_X_predict(X)
516
+ return self.APLRRegressor.calculate_local_feature_contribution(X_transformed)
517
+
518
+ def calculate_local_term_contribution(
519
+ self, X: Union[pd.DataFrame, FloatMatrix]
520
+ ) -> FloatMatrix:
521
+ X_transformed = self._preprocess_X_predict(X)
522
+ return self.APLRRegressor.calculate_local_term_contribution(X_transformed)
523
+
524
+ def calculate_local_contribution_from_selected_terms(
525
+ self, X: Union[pd.DataFrame, FloatMatrix], predictor_indexes: List[int]
526
+ ) -> FloatVector:
527
+ X_transformed = self._preprocess_X_predict(X)
528
+ return self.APLRRegressor.calculate_local_contribution_from_selected_terms(
529
+ X_transformed, predictor_indexes
530
+ )
531
+
532
+ def calculate_terms(self, X: Union[pd.DataFrame, FloatMatrix]) -> FloatMatrix:
533
+ X_transformed = self._preprocess_X_predict(X)
534
+ return self.APLRRegressor.calculate_terms(X_transformed)
535
+
536
+ def get_term_names(self) -> List[str]:
537
+ return self.APLRRegressor.get_term_names()
538
+
539
+ def get_term_affiliations(self) -> List[str]:
540
+ return self.APLRRegressor.get_term_affiliations()
541
+
542
+ def get_unique_term_affiliations(self) -> List[str]:
543
+ return self.APLRRegressor.get_unique_term_affiliations()
544
+
545
+ def get_base_predictors_in_each_unique_term_affiliation(self) -> List[List[int]]:
546
+ return self.APLRRegressor.get_base_predictors_in_each_unique_term_affiliation()
547
+
548
+ def get_term_coefficients(self) -> FloatVector:
549
+ return self.APLRRegressor.get_term_coefficients()
550
+
551
+ def get_validation_error_steps(self) -> FloatMatrix:
552
+ return self.APLRRegressor.get_validation_error_steps()
553
+
554
+ def get_feature_importance(self) -> FloatVector:
555
+ return self.APLRRegressor.get_feature_importance()
556
+
557
+ def get_term_importance(self) -> FloatVector:
558
+ return self.APLRRegressor.get_term_importance()
559
+
560
+ def get_term_main_predictor_indexes(self) -> IntVector:
561
+ return self.APLRRegressor.get_term_main_predictor_indexes()
562
+
563
+ def get_term_interaction_levels(self) -> IntVector:
564
+ return self.APLRRegressor.get_term_interaction_levels()
565
+
566
+ def get_intercept(self) -> float:
567
+ return self.APLRRegressor.get_intercept()
568
+
569
+ def get_optimal_m(self) -> int:
570
+ return self.APLRRegressor.get_optimal_m()
571
+
572
+ def get_validation_tuning_metric(self) -> str:
573
+ return self.APLRRegressor.get_validation_tuning_metric()
574
+
575
+ def get_main_effect_shape(self, predictor_index: int) -> Dict[float, float]:
576
+ return self.APLRRegressor.get_main_effect_shape(predictor_index)
577
+
578
+ def get_unique_term_affiliation_shape(
579
+ self,
580
+ unique_term_affiliation: str,
581
+ max_rows_before_sampling: int = 500000,
582
+ additional_points: int = 250,
583
+ ) -> FloatMatrix:
584
+ return self.APLRRegressor.get_unique_term_affiliation_shape(
585
+ unique_term_affiliation, max_rows_before_sampling, additional_points
586
+ )
587
+
588
+ def get_cv_error(self) -> float:
589
+ return self.APLRRegressor.get_cv_error()
590
+
591
+ def get_num_cv_folds(self) -> int:
592
+ """
593
+ Gets the number of cross-validation folds used during training.
594
+
595
+ :return: The number of folds.
596
+ """
597
+ return self.APLRRegressor.get_num_cv_folds()
598
+
599
+ def get_cv_validation_predictions(self, fold_index: int) -> FloatVector:
600
+ """
601
+ Gets the validation predictions for a specific cross-validation fold.
602
+
603
+ Note that these predictions may be conservative, as the final model is an ensemble of the models
604
+ from all cross-validation folds, which has a variance-reducing effect similar to bagging.
605
+
606
+ :param fold_index: The index of the fold.
607
+ :return: A numpy array containing the validation predictions.
608
+ """
609
+ return self.APLRRegressor.get_cv_validation_predictions(fold_index)
610
+
611
+ def get_cv_y(self, fold_index: int) -> FloatVector:
612
+ """
613
+ Gets the validation response values (y) for a specific cross-validation fold.
614
+
615
+ :param fold_index: The index of the fold.
616
+ :return: A numpy array containing the validation response values.
617
+ """
618
+ return self.APLRRegressor.get_cv_y(fold_index)
619
+
620
+ def get_cv_sample_weight(self, fold_index: int) -> FloatVector:
621
+ """
622
+ Gets the validation sample weights for a specific cross-validation fold.
623
+
624
+ :param fold_index: The index of the fold.
625
+ :return: A numpy array containing the validation sample weights.
626
+ """
627
+ return self.APLRRegressor.get_cv_sample_weight(fold_index)
628
+
629
+ def get_cv_validation_indexes(self, fold_index: int) -> IntVector:
630
+ """
631
+ Gets the original indexes of the validation observations for a specific cross-validation fold.
632
+
633
+ :param fold_index: The index of the fold.
634
+ :return: A numpy array containing the original indexes.
635
+ """
636
+ return self.APLRRegressor.get_cv_validation_indexes(fold_index)
637
+
638
+ def set_intercept(self, value: float):
639
+ self.APLRRegressor.set_intercept(value)
640
+
641
+ def plot_affiliation_shape(
642
+ self,
643
+ affiliation: str,
644
+ plot: bool = True,
645
+ save: bool = False,
646
+ path: str = "",
647
+ ):
648
+ """
649
+ Plots or saves the shape of a given unique term affiliation.
650
+
651
+ For main effects, it produces a line plot. For two-way interactions, it produces a heatmap.
652
+ Plotting for higher-order interactions is not supported.
653
+
654
+ :param affiliation: A string specifying which unique_term_affiliation to use.
655
+ :param plot: If True, displays the plot.
656
+ :param save: If True, saves the plot to a file.
657
+ :param path: The file path to save the plot. If empty and save is True, a default path will be used.
658
+ """
659
+ try:
660
+ import matplotlib.pyplot as plt
661
+ except ImportError:
662
+ raise ImportError("matplotlib is required for plotting. Please install it.")
663
+
664
+ all_affiliations = self.get_unique_term_affiliations()
665
+ if affiliation not in all_affiliations:
666
+ raise ValueError(
667
+ f"Affiliation '{affiliation}' not found in model. "
668
+ f"Available affiliations are: {all_affiliations}"
669
+ )
670
+
671
+ affiliation_index = all_affiliations.index(affiliation)
672
+
673
+ predictors_in_each_affiliation = (
674
+ self.get_base_predictors_in_each_unique_term_affiliation()
675
+ )
676
+ predictor_indexes_used = predictors_in_each_affiliation[affiliation_index]
677
+
678
+ shape = self.get_unique_term_affiliation_shape(affiliation)
679
+ if shape.shape[0] == 0:
680
+ print(f"No shape data available for affiliation '{affiliation}'.")
681
+ return
682
+
683
+ predictor_names = affiliation.split(" & ")
684
+
685
+ is_main_effect: bool = len(predictor_indexes_used) == 1
686
+ is_two_way_interaction: bool = len(predictor_indexes_used) == 2
687
+
688
+ if is_main_effect:
689
+ fig = plt.figure()
690
+ # Sort by predictor value for a clean line plot
691
+ sorted_indices = np.argsort(shape[:, 0])
692
+ plt.plot(shape[sorted_indices, 0], shape[sorted_indices, 1])
693
+ plt.xlabel(predictor_names[0])
694
+ plt.ylabel("Contribution to linear predictor")
695
+ plt.title(f"Main effect of {predictor_names[0]}")
696
+ plt.grid(True)
697
+ elif is_two_way_interaction:
698
+ fig = plt.figure(figsize=(8, 6))
699
+
700
+ # Get unique coordinates and their inverse mapping
701
+ y_unique, y_inv = np.unique(shape[:, 0], return_inverse=True)
702
+ x_unique, x_inv = np.unique(shape[:, 1], return_inverse=True)
703
+
704
+ # Create grid for sums and counts
705
+ grid_sums = np.zeros((len(y_unique), len(x_unique)))
706
+ grid_counts = np.zeros((len(y_unique), len(x_unique)))
707
+
708
+ # Populate sums and counts to later calculate the mean
709
+ np.add.at(grid_sums, (y_inv, x_inv), shape[:, 2])
710
+ np.add.at(grid_counts, (y_inv, x_inv), 1)
711
+
712
+ # Calculate mean, avoiding division by zero
713
+ with np.errstate(divide="ignore", invalid="ignore"):
714
+ pivot_table_values = np.true_divide(grid_sums, grid_counts)
715
+ # Where there's no data, pivot_table_values will be nan, which is fine for imshow.
716
+
717
+ plt.imshow(
718
+ pivot_table_values,
719
+ aspect="auto",
720
+ origin="lower",
721
+ extent=[
722
+ x_unique.min(),
723
+ x_unique.max(),
724
+ y_unique.min(),
725
+ y_unique.max(),
726
+ ],
727
+ cmap="Blues_r",
728
+ )
729
+ plt.colorbar(label="Contribution to the linear predictor")
730
+ plt.xlabel(predictor_names[1])
731
+ plt.ylabel(predictor_names[0])
732
+ plt.title(
733
+ f"Interaction between {predictor_names[0]} and {predictor_names[1]}"
734
+ )
735
+ else:
736
+ print(
737
+ f"Plotting for interaction level > 2 is not supported. Affiliation: {affiliation}"
738
+ )
739
+ return
740
+
741
+ if save:
742
+ save_path = path or f"shape_of_{affiliation.replace(' & ', '_')}.png"
743
+ plt.savefig(save_path)
744
+
745
+ if plot:
746
+ plt.show()
747
+
748
+ plt.close(fig)
749
+
750
+ def remove_provided_custom_functions(self):
751
+ self.APLRRegressor.remove_provided_custom_functions()
752
+ self.calculate_custom_validation_error_function = None
753
+ self.calculate_custom_loss_function = None
754
+ self.calculate_custom_negative_gradient_function = None
755
+
756
+ def clear_cv_results(self):
757
+ """
758
+ Clears the stored cross-validation results (predictions, y, etc.) to free up memory.
759
+ """
760
+ self.APLRRegressor.clear_cv_results()
761
+
762
+ # For sklearn
763
+ def get_params(self, deep=True):
764
+ return {
765
+ "m": self.m,
766
+ "v": self.v,
767
+ "random_state": self.random_state,
768
+ "loss_function": self.loss_function,
769
+ "link_function": self.link_function,
770
+ "n_jobs": self.n_jobs,
771
+ "cv_folds": self.cv_folds,
772
+ "bins": self.bins,
773
+ "max_interaction_level": self.max_interaction_level,
774
+ "max_interactions": self.max_interactions,
775
+ "verbosity": self.verbosity,
776
+ "min_observations_in_split": self.min_observations_in_split,
777
+ "ineligible_boosting_steps_added": self.ineligible_boosting_steps_added,
778
+ "max_eligible_terms": self.max_eligible_terms,
779
+ "dispersion_parameter": self.dispersion_parameter,
780
+ "validation_tuning_metric": self.validation_tuning_metric,
781
+ "quantile": self.quantile,
782
+ "calculate_custom_validation_error_function": self.calculate_custom_validation_error_function,
783
+ "calculate_custom_loss_function": self.calculate_custom_loss_function,
784
+ "calculate_custom_negative_gradient_function": self.calculate_custom_negative_gradient_function,
785
+ "calculate_custom_transform_linear_predictor_to_predictions_function": self.calculate_custom_transform_linear_predictor_to_predictions_function,
786
+ "calculate_custom_differentiate_predictions_wrt_linear_predictor_function": self.calculate_custom_differentiate_predictions_wrt_linear_predictor_function,
787
+ "boosting_steps_before_interactions_are_allowed": self.boosting_steps_before_interactions_are_allowed,
788
+ "monotonic_constraints_ignore_interactions": self.monotonic_constraints_ignore_interactions,
789
+ "group_mse_by_prediction_bins": self.group_mse_by_prediction_bins,
790
+ "group_mse_cycle_min_obs_in_bin": self.group_mse_cycle_min_obs_in_bin,
791
+ "early_stopping_rounds": self.early_stopping_rounds,
792
+ "num_first_steps_with_linear_effects_only": self.num_first_steps_with_linear_effects_only,
793
+ "penalty_for_non_linearity": self.penalty_for_non_linearity,
794
+ "penalty_for_interactions": self.penalty_for_interactions,
795
+ "max_terms": self.max_terms,
796
+ "ridge_penalty": self.ridge_penalty,
797
+ "mean_bias_correction": self.mean_bias_correction,
798
+ "faster_convergence": self.faster_convergence,
799
+ }
800
+
801
+ # For sklearn
802
+ def set_params(self, **parameters):
803
+ for parameter, value in parameters.items():
804
+ setattr(self, parameter, value)
805
+ self.__set_params_cpp()
806
+ return self
807
+
808
+
809
+ class APLRClassifier(BaseAPLR):
810
+ def __init__(
811
+ self,
812
+ m: int = 3000,
813
+ v: float = 0.5,
814
+ random_state: int = 0,
815
+ n_jobs: int = 0,
816
+ cv_folds: int = 5,
817
+ bins: int = 300,
818
+ verbosity: int = 0,
819
+ max_interaction_level: int = 1,
820
+ max_interactions: int = 100000,
821
+ min_observations_in_split: int = 4,
822
+ ineligible_boosting_steps_added: int = 15,
823
+ max_eligible_terms: int = 7,
824
+ boosting_steps_before_interactions_are_allowed: int = 0,
825
+ monotonic_constraints_ignore_interactions: bool = False,
826
+ early_stopping_rounds: int = 200,
827
+ num_first_steps_with_linear_effects_only: int = 0,
828
+ penalty_for_non_linearity: float = 0.0,
829
+ penalty_for_interactions: float = 0.0,
830
+ max_terms: int = 0,
831
+ ridge_penalty: float = 0.0001,
832
+ ):
833
+ self.m = m
834
+ self.v = v
835
+ self.random_state = random_state
836
+ self.n_jobs = n_jobs
837
+ self.cv_folds = cv_folds
838
+ self.bins = bins
839
+ self.verbosity = verbosity
840
+ self.max_interaction_level = max_interaction_level
841
+ self.max_interactions = max_interactions
842
+ self.min_observations_in_split = min_observations_in_split
843
+ self.ineligible_boosting_steps_added = ineligible_boosting_steps_added
844
+ self.max_eligible_terms = max_eligible_terms
845
+ self.boosting_steps_before_interactions_are_allowed = (
846
+ boosting_steps_before_interactions_are_allowed
847
+ )
848
+ self.monotonic_constraints_ignore_interactions = (
849
+ monotonic_constraints_ignore_interactions
850
+ )
851
+ self.early_stopping_rounds = early_stopping_rounds
852
+ self.num_first_steps_with_linear_effects_only = (
853
+ num_first_steps_with_linear_effects_only
854
+ )
855
+ self.penalty_for_non_linearity = penalty_for_non_linearity
856
+ self.penalty_for_interactions = penalty_for_interactions
857
+ self.max_terms = max_terms
858
+ self.ridge_penalty = ridge_penalty
859
+
860
+ # Data transformations
861
+ self.median_values_ = {}
862
+ self.categorical_features_ = []
863
+ self.ohe_columns_ = []
864
+ self.na_imputed_cols_ = []
865
+ self.X_names_ = []
866
+ self.final_training_columns_ = []
867
+
868
+ # Creating aplr_cpp and setting parameters
869
+ self.APLRClassifier = aplr_cpp.APLRClassifier()
870
+ self.__set_params_cpp()
871
+
872
+ # Sets parameters for aplr_cpp.APLRClassifier cpp object
873
+ def __set_params_cpp(self):
874
+ self.APLRClassifier.m = self.m
875
+ self.APLRClassifier.v = self.v
876
+ self.APLRClassifier.random_state = self.random_state
877
+ self.APLRClassifier.n_jobs = self.n_jobs
878
+ self.APLRClassifier.cv_folds = self.cv_folds
879
+ self.APLRClassifier.bins = self.bins
880
+ self.APLRClassifier.verbosity = self.verbosity
881
+ self.APLRClassifier.max_interaction_level = self.max_interaction_level
882
+ self.APLRClassifier.max_interactions = self.max_interactions
883
+ self.APLRClassifier.min_observations_in_split = self.min_observations_in_split
884
+ self.APLRClassifier.ineligible_boosting_steps_added = (
885
+ self.ineligible_boosting_steps_added
886
+ )
887
+ self.APLRClassifier.max_eligible_terms = self.max_eligible_terms
888
+ self.APLRClassifier.boosting_steps_before_interactions_are_allowed = (
889
+ self.boosting_steps_before_interactions_are_allowed
890
+ )
891
+ self.APLRClassifier.monotonic_constraints_ignore_interactions = (
892
+ self.monotonic_constraints_ignore_interactions
893
+ )
894
+ self.APLRClassifier.early_stopping_rounds = self.early_stopping_rounds
895
+ self.APLRClassifier.num_first_steps_with_linear_effects_only = (
896
+ self.num_first_steps_with_linear_effects_only
897
+ )
898
+ self.APLRClassifier.penalty_for_non_linearity = self.penalty_for_non_linearity
899
+ self.APLRClassifier.penalty_for_interactions = self.penalty_for_interactions
900
+ self.APLRClassifier.max_terms = self.max_terms
901
+ self.APLRClassifier.ridge_penalty = self.ridge_penalty
902
+
903
+ def fit(
904
+ self,
905
+ X: Union[pd.DataFrame, FloatMatrix],
906
+ y: Union[FloatVector, List[str]],
907
+ sample_weight: FloatVector = np.empty(0),
908
+ X_names: List[str] = [],
909
+ cv_observations: IntMatrix = np.empty([0, 0]),
910
+ prioritized_predictors_indexes: List[int] = [],
911
+ monotonic_constraints: List[int] = [],
912
+ interaction_constraints: List[List[int]] = [],
913
+ predictor_learning_rates: List[float] = [],
914
+ predictor_penalties_for_non_linearity: List[float] = [],
915
+ predictor_penalties_for_interactions: List[float] = [],
916
+ predictor_min_observations_in_split: List[int] = [],
917
+ ):
918
+ self._validate_X_fit_rows(X)
919
+ self.__set_params_cpp()
920
+ X_transformed, X_names_transformed = self._preprocess_X_fit(
921
+ X, X_names, sample_weight
922
+ )
923
+
924
+ if isinstance(y, np.ndarray):
925
+ y = y.astype(str).tolist()
926
+ elif isinstance(y, list) and y and not isinstance(y[0], str):
927
+ y = [str(val) for val in y]
928
+
929
+ self.APLRClassifier.fit(
930
+ X_transformed,
931
+ y,
932
+ sample_weight,
933
+ X_names_transformed,
934
+ cv_observations,
935
+ prioritized_predictors_indexes,
936
+ monotonic_constraints,
937
+ interaction_constraints,
938
+ predictor_learning_rates,
939
+ predictor_penalties_for_non_linearity,
940
+ predictor_penalties_for_interactions,
941
+ predictor_min_observations_in_split,
942
+ )
943
+ # For sklearn
944
+ self.classes_ = np.arange(len(self.APLRClassifier.get_categories()))
945
+
946
+ def predict_class_probabilities(
947
+ self,
948
+ X: Union[pd.DataFrame, FloatMatrix],
949
+ cap_predictions_to_minmax_in_training: bool = False,
950
+ ) -> FloatMatrix:
951
+ X_transformed = self._preprocess_X_predict(X)
952
+ return self.APLRClassifier.predict_class_probabilities(
953
+ X_transformed, cap_predictions_to_minmax_in_training
954
+ )
955
+
956
+ def predict(
957
+ self,
958
+ X: Union[pd.DataFrame, FloatMatrix],
959
+ cap_predictions_to_minmax_in_training: bool = False,
960
+ ) -> List[str]:
961
+ X_transformed = self._preprocess_X_predict(X)
962
+ return self.APLRClassifier.predict(
963
+ X_transformed, cap_predictions_to_minmax_in_training
964
+ )
965
+
966
+ def calculate_local_feature_contribution(
967
+ self, X: Union[pd.DataFrame, FloatMatrix]
968
+ ) -> FloatMatrix:
969
+ X_transformed = self._preprocess_X_predict(X)
970
+ return self.APLRClassifier.calculate_local_feature_contribution(X_transformed)
971
+
972
+ def get_categories(self) -> List[str]:
973
+ return self.APLRClassifier.get_categories()
974
+
975
+ def get_logit_model(self, category: str) -> APLRRegressor:
976
+ logit_model_cpp = self.APLRClassifier.get_logit_model(category)
977
+
978
+ logit_model_py = APLRRegressor(
979
+ m=self.m,
980
+ v=self.v,
981
+ random_state=self.random_state,
982
+ loss_function="binomial",
983
+ link_function="logit",
984
+ n_jobs=self.n_jobs,
985
+ cv_folds=self.cv_folds,
986
+ bins=self.bins,
987
+ max_interaction_level=self.max_interaction_level,
988
+ max_interactions=self.max_interactions,
989
+ min_observations_in_split=self.min_observations_in_split,
990
+ ineligible_boosting_steps_added=self.ineligible_boosting_steps_added,
991
+ max_eligible_terms=self.max_eligible_terms,
992
+ verbosity=self.verbosity,
993
+ boosting_steps_before_interactions_are_allowed=self.boosting_steps_before_interactions_are_allowed,
994
+ monotonic_constraints_ignore_interactions=self.monotonic_constraints_ignore_interactions,
995
+ early_stopping_rounds=self.early_stopping_rounds,
996
+ num_first_steps_with_linear_effects_only=self.num_first_steps_with_linear_effects_only,
997
+ penalty_for_non_linearity=self.penalty_for_non_linearity,
998
+ penalty_for_interactions=self.penalty_for_interactions,
999
+ max_terms=self.max_terms,
1000
+ ridge_penalty=self.ridge_penalty,
1001
+ )
1002
+
1003
+ logit_model_py.APLRRegressor = logit_model_cpp
1004
+
1005
+ return logit_model_py
1006
+
1007
+ def get_validation_error_steps(self) -> FloatMatrix:
1008
+ return self.APLRClassifier.get_validation_error_steps()
1009
+
1010
+ def get_cv_error(self) -> float:
1011
+ return self.APLRClassifier.get_cv_error()
1012
+
1013
+ def get_feature_importance(self) -> FloatVector:
1014
+ return self.APLRClassifier.get_feature_importance()
1015
+
1016
+ def get_unique_term_affiliations(self) -> List[str]:
1017
+ return self.APLRClassifier.get_unique_term_affiliations()
1018
+
1019
+ def get_base_predictors_in_each_unique_term_affiliation(self) -> List[List[int]]:
1020
+ return self.APLRClassifier.get_base_predictors_in_each_unique_term_affiliation()
1021
+
1022
+ def clear_cv_results(self):
1023
+ """
1024
+ Clears the stored cross-validation results from all underlying logit models to free up memory.
1025
+ """
1026
+ self.APLRClassifier.clear_cv_results()
1027
+
1028
+ # For sklearn
1029
+ def get_params(self, deep=True):
1030
+ return {
1031
+ "m": self.m,
1032
+ "v": self.v,
1033
+ "random_state": self.random_state,
1034
+ "n_jobs": self.n_jobs,
1035
+ "cv_folds": self.cv_folds,
1036
+ "bins": self.bins,
1037
+ "verbosity": self.verbosity,
1038
+ "max_interaction_level": self.max_interaction_level,
1039
+ "max_interactions": self.max_interactions,
1040
+ "min_observations_in_split": self.min_observations_in_split,
1041
+ "ineligible_boosting_steps_added": self.ineligible_boosting_steps_added,
1042
+ "max_eligible_terms": self.max_eligible_terms,
1043
+ "boosting_steps_before_interactions_are_allowed": self.boosting_steps_before_interactions_are_allowed,
1044
+ "monotonic_constraints_ignore_interactions": self.monotonic_constraints_ignore_interactions,
1045
+ "early_stopping_rounds": self.early_stopping_rounds,
1046
+ "num_first_steps_with_linear_effects_only": self.num_first_steps_with_linear_effects_only,
1047
+ "penalty_for_non_linearity": self.penalty_for_non_linearity,
1048
+ "penalty_for_interactions": self.penalty_for_interactions,
1049
+ "max_terms": self.max_terms,
1050
+ "ridge_penalty": self.ridge_penalty,
1051
+ }
1052
+
1053
+ # For sklearn
1054
+ def set_params(self, **parameters):
1055
+ for parameter, value in parameters.items():
1056
+ setattr(self, parameter, value)
1057
+ self.__set_params_cpp()
1058
+ return self
1059
+
1060
+ # For sklearn
1061
+ def predict_proba(self, X: FloatMatrix) -> FloatMatrix:
1062
+ return self.predict_class_probabilities(X)
1063
+
1064
+
1065
+ class APLRTuner:
1066
+ def __init__(
1067
+ self,
1068
+ parameters: Union[Dict[str, List[float]], List[Dict[str, List[float]]]] = {
1069
+ "max_interaction_level": [0, 1],
1070
+ "min_observations_in_split": [4, 10, 20, 100, 500, 1000],
1071
+ },
1072
+ is_regressor: bool = True,
1073
+ ):
1074
+ self.parameters = parameters
1075
+ self.is_regressor = is_regressor
1076
+ self.parameter_grid = self._create_parameter_grid()
1077
+
1078
+ def _create_parameter_grid(self) -> List[Dict[str, float]]:
1079
+ items = sorted(self.parameters.items())
1080
+ keys, values = zip(*items)
1081
+ combinations = list(itertools.product(*values))
1082
+ grid = [dict(zip(keys, combination)) for combination in combinations]
1083
+ return grid
1084
+
1085
+ def fit(self, X: Union[pd.DataFrame, FloatMatrix], y: FloatVector, **kwargs):
1086
+ self.cv_results: List[Dict[str, float]] = []
1087
+ best_validation_result = np.inf
1088
+ for params in self.parameter_grid:
1089
+ if self.is_regressor:
1090
+ model = APLRRegressor(**params)
1091
+ else:
1092
+ model = APLRClassifier(**params)
1093
+ model.fit(X, y, **kwargs)
1094
+ cv_error_for_this_model = model.get_cv_error()
1095
+ cv_results_for_this_model = model.get_params()
1096
+ cv_results_for_this_model["cv_error"] = cv_error_for_this_model
1097
+ self.cv_results.append(cv_results_for_this_model)
1098
+ if cv_error_for_this_model < best_validation_result:
1099
+ best_validation_result = cv_error_for_this_model
1100
+ self.best_model = model
1101
+ self.cv_results = sorted(self.cv_results, key=lambda x: x["cv_error"])
1102
+
1103
+ def predict(
1104
+ self, X: Union[pd.DataFrame, FloatMatrix], **kwargs
1105
+ ) -> Union[FloatVector, List[str]]:
1106
+ return self.best_model.predict(X, **kwargs)
1107
+
1108
+ def predict_class_probabilities(
1109
+ self, X: Union[pd.DataFrame, FloatMatrix], **kwargs
1110
+ ) -> FloatMatrix:
1111
+ if self.is_regressor == False:
1112
+ return self.best_model.predict_class_probabilities(X, **kwargs)
1113
+ else:
1114
+ raise TypeError(
1115
+ "predict_class_probabilities is only possible when is_regressor is False"
1116
+ )
1117
+
1118
+ def predict_proba(
1119
+ self, X: Union[pd.DataFrame, FloatMatrix], **kwargs
1120
+ ) -> FloatMatrix:
1121
+ return self.predict_class_probabilities(X, **kwargs)
1122
+
1123
+ def get_best_estimator(self) -> Union[APLRClassifier, APLRRegressor]:
1124
+ return self.best_model
1125
+
1126
+ def get_cv_results(self) -> List[Dict[str, float]]:
1127
+ return self.cv_results
@@ -0,0 +1,34 @@
1
+ Metadata-Version: 2.4
2
+ Name: aplr
3
+ Version: 10.19.2
4
+ Summary: Automatic Piecewise Linear Regression
5
+ Home-page: https://github.com/ottenbreit-data-science/aplr
6
+ Author: Mathias von Ottenbreit
7
+ Author-email: ottenbreitdatascience@gmail.com
8
+ License: MIT
9
+ Platform: Windows
10
+ Platform: Linux
11
+ Platform: MacOS
12
+ Classifier: License :: OSI Approved :: MIT License
13
+ Requires-Python: >=3.8
14
+ Description-Content-Type: text/markdown
15
+ License-File: LICENSE
16
+ Requires-Dist: numpy>=1.11
17
+ Requires-Dist: pandas>=1.0.0
18
+ Provides-Extra: plots
19
+ Requires-Dist: matplotlib>=3.0; extra == "plots"
20
+ Dynamic: author
21
+ Dynamic: author-email
22
+ Dynamic: classifier
23
+ Dynamic: description
24
+ Dynamic: description-content-type
25
+ Dynamic: home-page
26
+ Dynamic: license
27
+ Dynamic: license-file
28
+ Dynamic: platform
29
+ Dynamic: provides-extra
30
+ Dynamic: requires-dist
31
+ Dynamic: requires-python
32
+ Dynamic: summary
33
+
34
+ The documentation for Automatic Piecewise Linear Regression is available at [https://github.com/ottenbreit-data-science/aplr](https://github.com/ottenbreit-data-science/aplr).
@@ -0,0 +1,8 @@
1
+ aplr_cpp.cpython-313-darwin.so,sha256=W21qjozcY93tnGSaTMBesj0Ntn7V27Zcv8VlZsLD5Dc,1339856
2
+ aplr-10.19.2.dist-info/RECORD,,
3
+ aplr-10.19.2.dist-info/WHEEL,sha256=g90Wq7X-bVdwIvEI8m8JyslasfeTxTaPRr_jHMRBUD8,137
4
+ aplr-10.19.2.dist-info/top_level.txt,sha256=DXVC0RIFGpzVnPeKWAZTXQdJheOEZL51Wip6Fx7zbR4,14
5
+ aplr-10.19.2.dist-info/METADATA,sha256=m3Yb7C36RKGeJSLVe6DzWAf5lGMNRFaQYyrPQ7rDXSU,1014
6
+ aplr-10.19.2.dist-info/licenses/LICENSE,sha256=g4qcQtkSVPHtGRi3T93DoFCrssvW6ij_emU-2fj_xfY,1113
7
+ aplr/__init__.py,sha256=rRfTgNWnYZlFatyA920lWqBcjwmQUI7FcvEPFUTJgzE,20
8
+ aplr/aplr.py,sha256=Pv_6dSaZ7WIbP6vzzB6-R8S0VLcKvlVJyP0RBToDZKw,48320
@@ -0,0 +1,6 @@
1
+ Wheel-Version: 1.0
2
+ Generator: setuptools (80.9.0)
3
+ Root-Is-Purelib: false
4
+ Tag: cp313-cp313-macosx_11_0_x86_64
5
+ Generator: delocate 0.13.0
6
+
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2022 Mathias von Ottenbreit <ottenbreitdatascience@gmail.com>
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,2 @@
1
+ aplr
2
+ aplr_cpp
Binary file