aplr 10.19.2__cp311-cp311-win_amd64.whl → 10.20.0__cp311-cp311-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
aplr/aplr.py CHANGED
@@ -1,4 +1,4 @@
1
- from typing import List, Callable, Optional, Dict, Union, Tuple
1
+ from typing import List, Callable, Optional, Dict, Union
2
2
  import numpy as np
3
3
  import pandas as pd
4
4
  import aplr_cpp
@@ -10,241 +10,57 @@ IntVector = np.ndarray
10
10
  IntMatrix = np.ndarray
11
11
 
12
12
 
13
- class BaseAPLR:
14
- def _preprocess_X_fit(
15
- self,
16
- X: Union[pd.DataFrame, FloatMatrix],
17
- X_names: List[str],
18
- sample_weight: FloatVector,
19
- ) -> Tuple[FloatMatrix, List[str]]:
20
- if sample_weight.size > 0:
21
- if sample_weight.ndim != 1:
22
- raise ValueError("sample_weight must be a 1D array.")
23
- if len(sample_weight) != X.shape[0]:
24
- raise ValueError(
25
- "sample_weight must have the same number of rows as X."
26
- )
27
- if np.any(np.isnan(sample_weight)) or np.any(np.isinf(sample_weight)):
28
- raise ValueError("sample_weight cannot contain nan or infinite values.")
29
- if np.any(sample_weight < 0):
30
- raise ValueError("sample_weight cannot contain negative values.")
31
-
32
- self._fit_preprocessor(X, X_names, sample_weight)
33
-
34
- X = self._transform_X(X)
35
-
36
- return X.to_numpy(dtype=np.float64), list(X.columns)
37
-
38
- def _preprocess_X_predict(self, X: Union[pd.DataFrame, FloatMatrix]) -> FloatMatrix:
39
- X = self._transform_X(X)
40
- return X.to_numpy(dtype=np.float64)
41
-
42
- def _fit_preprocessor(
43
- self,
44
- X: Union[pd.DataFrame, FloatMatrix],
45
- X_names: List[str],
46
- sample_weight: FloatVector,
47
- ) -> None:
48
- """Learns transformations from the training data and sets preprocessor state."""
49
- X = self._convert_input_to_dataframe_for_fit(X, X_names=X_names)
50
- self.X_names_ = list(X.columns)
51
- self.categorical_features_ = list(
52
- X.select_dtypes(include=["category", "object"]).columns
53
- )
54
-
55
- self._fit_one_hot_encoding(X)
56
- self._fit_missing_indicators(X)
57
-
58
- # Learn median values for imputation from the original data.
59
- self.median_values_ = {}
60
- numeric_cols_for_median = [
61
- col for col in X.columns if col not in self.categorical_features_
62
- ]
63
- for col in numeric_cols_for_median:
64
- missing_mask = X[col].isnull()
65
- if sample_weight.size > 0:
66
- valid_indices = ~missing_mask
67
- col_data = X.loc[valid_indices, col]
68
- col_weights = sample_weight[valid_indices]
69
- if col_data.empty:
70
- median_val = 0
71
- else:
72
- col_data_np = col_data.to_numpy()
73
- sort_indices = np.argsort(col_data_np, kind="stable")
74
- sorted_data = col_data_np[sort_indices]
75
- sorted_weights = col_weights[sort_indices]
76
- cumulative_weights = np.cumsum(sorted_weights)
77
- total_weight = cumulative_weights[-1]
78
- median_weight_index = np.searchsorted(
79
- cumulative_weights, total_weight / 2.0
80
- )
81
- if median_weight_index >= len(sorted_data):
82
- median_weight_index = len(sorted_data) - 1
83
- median_val = sorted_data[median_weight_index]
84
- else:
85
- if X[col].isnull().all():
86
- median_val = 0
87
- else:
88
- median_val = X[col].median()
89
-
90
- if pd.isna(median_val):
91
- median_val = 0
92
- self.median_values_[col] = median_val
93
-
94
- # Determine the final column names after all transformations.
95
- final_cols = []
96
- if self.ohe_columns_:
97
- final_cols.extend(self.ohe_columns_)
13
+ def _dataframe_to_cpp_dataframe(df: pd.DataFrame) -> aplr_cpp.CppDataFrame:
14
+ """Converts a pandas DataFrame to a CppDataFrame."""
15
+ cpp_df = aplr_cpp.CppDataFrame()
16
+ for col_name in df.columns:
17
+ col = df[col_name]
18
+ if pd.api.types.is_numeric_dtype(col.dtype):
19
+ # Convert numeric columns to std::vector<double>
20
+ # NaNs are preserved and handled in C++
21
+ cpp_df.add_numeric_column(
22
+ col_name, col.to_numpy(dtype=np.float64, na_value=np.nan)
23
+ )
24
+ elif (
25
+ isinstance(col.dtype, pd.CategoricalDtype)
26
+ or pd.api.types.is_object_dtype(col.dtype)
27
+ or pd.api.types.is_string_dtype(col.dtype)
28
+ ):
29
+ # Convert categorical/object/string columns to std::vector<std::string>
30
+ # Missing values (None, np.nan) are converted to empty strings for C++ handling
31
+ cpp_df.add_categorical_column(col_name, col.astype(str).fillna("").tolist())
98
32
  else:
99
- final_cols.extend(self.X_names_)
100
- final_cols.extend([col + "_missing" for col in self.na_imputed_cols_])
101
- self.final_training_columns_ = final_cols
102
-
103
- def _fit_one_hot_encoding(self, X: pd.DataFrame) -> None:
104
- """Learns the complete set of columns that will exist after one-hot encoding."""
105
- if not self.categorical_features_:
106
- return
107
- self.ohe_columns_ = list(
108
- pd.get_dummies(
109
- X, columns=self.categorical_features_, dummy_na=False
110
- ).columns
111
- )
112
-
113
- def _fit_missing_indicators(self, X: pd.DataFrame) -> None:
114
- """Learns which columns will have missing indicators added."""
115
- self.na_imputed_cols_ = [col for col in X.columns if X[col].isnull().any()]
116
-
117
- def _transform_X(self, X: Union[pd.DataFrame, FloatMatrix]) -> pd.DataFrame:
118
- """Transforms data using the fitted preprocessor attributes."""
119
- X = self._convert_input_to_dataframe_for_transform(X)
120
- X = self._transform_one_hot_encoding(X)
121
-
122
- # Just-in-time copy to avoid modifying user's original data.
123
- # A copy is needed if we are about to perform in-place modifications
124
- # (adding missing indicators or filling NaNs) and a copy hasn't already
125
- # been made by one-hot encoding.
126
- if not self.categorical_features_ and X.isnull().to_numpy().any():
127
- X = X.copy()
128
-
129
- X = self._transform_missing_indicators(X)
130
-
131
- for col, val in self.median_values_.items():
132
- if col in X.columns:
133
- X[col] = X[col].fillna(val)
134
-
135
- # Enforce final column order and add missing columns if necessary
136
- if self.final_training_columns_:
137
- missing_final_cols = set(self.final_training_columns_) - set(X.columns)
138
- for c in missing_final_cols:
139
- X[c] = 0
140
- if not X.columns.equals(pd.Index(self.final_training_columns_)):
141
- X = X.reindex(columns=self.final_training_columns_, copy=False)
142
-
143
- return X
144
-
145
- def _transform_one_hot_encoding(self, X: pd.DataFrame) -> pd.DataFrame:
146
- """Applies one-hot encoding using learned OHE columns during transformation."""
147
- if not self.categorical_features_:
148
- return X
149
-
150
- X = pd.get_dummies(X, columns=self.categorical_features_, dummy_na=False)
151
- # Handle missing OHE columns (categories not seen in new data)
152
- missing_cols = set(self.ohe_columns_) - set(X.columns)
153
- for c in missing_cols:
154
- X[c] = 0
155
- # Ensure column order
156
- if not X.columns.equals(pd.Index(self.ohe_columns_)):
157
- X = X.reindex(columns=self.ohe_columns_, copy=False)
158
- return X
159
-
160
- def _transform_missing_indicators(self, X: pd.DataFrame) -> pd.DataFrame:
161
- """Adds _missing indicator columns for features with NaNs during transformation."""
162
- if not self.na_imputed_cols_:
163
- return X
164
- # Only add indicators for columns that were imputed during fit and are currently missing data.
165
- for col in self.na_imputed_cols_:
166
- if col in X.columns and X[col].isnull().any():
167
- X[col + "_missing"] = X[col].isnull().astype(int)
168
- return X
169
-
170
- def _convert_input_to_dataframe_for_fit(
171
- self,
172
- X: Union[pd.DataFrame, FloatMatrix],
173
- X_names: Optional[List[str]] = None,
174
- ) -> pd.DataFrame:
175
- """Converts input X to a pandas DataFrame for fitting, handling column names."""
176
- X, was_converted = self._to_dataframe(X)
177
- if was_converted:
178
- if X_names:
179
- X.columns = list(X_names)
180
- else:
181
- X.columns = [f"X{i}" for i in range(X.shape[1])]
182
- return X
183
-
184
- def _convert_input_to_dataframe_for_transform(
185
- self, X: Union[pd.DataFrame, FloatMatrix]
186
- ) -> pd.DataFrame:
187
- """Converts input X to a pandas DataFrame for transformation, aligning columns."""
188
- X, was_converted = self._to_dataframe(X)
189
- if was_converted:
190
- if self.X_names_ and len(self.X_names_) == X.shape[1]:
191
- X.columns = self.X_names_ # Use names learned during fit
192
- else: # If X was already a DataFrame
193
- if set(X.columns) != set(self.X_names_):
194
- raise ValueError(
195
- "Input columns for prediction do not match training columns."
33
+ raise TypeError(
34
+ f"Unsupported column type for column '{col_name}': {col.dtype}"
35
+ )
36
+ return cpp_df
37
+
38
+
39
+ def _prepare_input_data(
40
+ X: Union[pd.DataFrame, FloatMatrix], preprocess: bool
41
+ ) -> Union[aplr_cpp.CppDataFrame, FloatMatrix]:
42
+ """
43
+ Prepares the input data for the C++ backend.
44
+
45
+ If X is a pandas DataFrame, it's converted. If preprocess is True, it becomes
46
+ a CppDataFrame. If preprocess is False, it's converted to a NumPy array.
47
+ NumPy arrays are passed through as is.
48
+ """
49
+ if isinstance(X, pd.DataFrame):
50
+ if preprocess:
51
+ return _dataframe_to_cpp_dataframe(X)
52
+ else:
53
+ # Check if all columns are numeric before converting
54
+ if not all(pd.api.types.is_numeric_dtype(X[col]) for col in X.columns):
55
+ raise RuntimeError(
56
+ "Cannot convert DataFrame to matrix if it contains non-numeric columns. "
57
+ "Please ensure all columns are numeric or set preprocess=True."
196
58
  )
197
- if not X.columns.equals(pd.Index(self.X_names_)):
198
- X = X.reindex(columns=self.X_names_, copy=False)
199
- return X
59
+ return X.to_numpy(dtype=np.float64)
60
+ return X
200
61
 
201
- def _to_dataframe(
202
- self, X: Union[pd.DataFrame, FloatMatrix]
203
- ) -> Tuple[pd.DataFrame, bool]:
204
- """Converts input to a pandas DataFrame if it is not already one."""
205
- if isinstance(X, pd.DataFrame):
206
- return X, False # Was already a DataFrame
207
62
 
208
- X_numeric: np.ndarray
209
- try:
210
- # If X is already a numpy array, astype with copy=False is more efficient.
211
- # It will only copy if the dtype is different from np.float64.
212
- if isinstance(X, np.ndarray):
213
- X_numeric = X.astype(np.float64, copy=False)
214
- else:
215
- # For other array-likes (e.g., list of lists), create the array.
216
- X_numeric = np.array(X, dtype=np.float64)
217
- except (ValueError, TypeError) as e:
218
- raise TypeError("Input X must be numeric if not a pandas DataFrame.") from e
219
- return pd.DataFrame(X_numeric, copy=False), True # Was converted
220
-
221
- def __setstate__(self, state):
222
- """Handles unpickling for backward compatibility."""
223
- self.__dict__.update(state)
224
-
225
- # For backward compatibility, initialize new attributes if they don't exist,
226
- # indicating the model was trained before these features were introduced.
227
- new_attributes = {
228
- "X_names_": [],
229
- "categorical_features_": [],
230
- "ohe_columns_": [],
231
- "na_imputed_cols_": [],
232
- "median_values_": {},
233
- "final_training_columns_": [],
234
- }
235
- for attr, default_value in new_attributes.items():
236
- if not hasattr(self, attr):
237
- setattr(self, attr, default_value)
238
-
239
- def _validate_X_fit_rows(self, X):
240
- """Checks if X has enough rows to be fitted."""
241
- if (isinstance(X, np.ndarray) and X.shape[0] < 2) or (
242
- isinstance(X, pd.DataFrame) and len(X) < 2
243
- ):
244
- raise ValueError("Input X must have at least 2 rows to be fitted.")
245
-
246
-
247
- class APLRRegressor(BaseAPLR):
63
+ class APLRRegressor:
248
64
  def __init__(
249
65
  self,
250
66
  m: int = 3000,
@@ -312,6 +128,7 @@ class APLRRegressor(BaseAPLR):
312
128
  ridge_penalty: float = 0.0001,
313
129
  mean_bias_correction: bool = False,
314
130
  faster_convergence: bool = False,
131
+ preprocess: bool = True,
315
132
  ):
316
133
  self.m = m
317
134
  self.v = v
@@ -361,14 +178,7 @@ class APLRRegressor(BaseAPLR):
361
178
  self.ridge_penalty = ridge_penalty
362
179
  self.mean_bias_correction = mean_bias_correction
363
180
  self.faster_convergence = faster_convergence
364
-
365
- # Data transformations
366
- self.median_values_ = {}
367
- self.categorical_features_ = []
368
- self.ohe_columns_ = []
369
- self.na_imputed_cols_ = []
370
- self.X_names_ = []
371
- self.final_training_columns_ = []
181
+ self.preprocess = preprocess
372
182
 
373
183
  # Creating aplr_cpp and setting parameters
374
184
  self.APLRRegressor = aplr_cpp.APLRRegressor()
@@ -432,6 +242,7 @@ class APLRRegressor(BaseAPLR):
432
242
  self.APLRRegressor.ridge_penalty = self.ridge_penalty
433
243
  self.APLRRegressor.mean_bias_correction = self.mean_bias_correction
434
244
  self.APLRRegressor.faster_convergence = self.faster_convergence
245
+ self.APLRRegressor.preprocess = self.preprocess
435
246
 
436
247
  def fit(
437
248
  self,
@@ -450,16 +261,14 @@ class APLRRegressor(BaseAPLR):
450
261
  predictor_penalties_for_interactions: List[float] = [],
451
262
  predictor_min_observations_in_split: List[int] = [],
452
263
  ):
453
- self._validate_X_fit_rows(X)
454
264
  self.__set_params_cpp()
455
- X_transformed, X_names_transformed = self._preprocess_X_fit(
456
- X, X_names, sample_weight
457
- )
265
+ X = _prepare_input_data(X, self.preprocess)
266
+
458
267
  self.APLRRegressor.fit(
459
- X_transformed,
268
+ X,
460
269
  y,
461
270
  sample_weight,
462
- X_names_transformed,
271
+ X_names,
463
272
  cv_observations,
464
273
  prioritized_predictors_indexes,
465
274
  monotonic_constraints,
@@ -477,14 +286,13 @@ class APLRRegressor(BaseAPLR):
477
286
  X: Union[pd.DataFrame, FloatMatrix],
478
287
  cap_predictions_to_minmax_in_training: bool = True,
479
288
  ) -> FloatVector:
289
+ X = _prepare_input_data(X, self.preprocess)
290
+
480
291
  if self.link_function == "custom_function":
481
292
  self.APLRRegressor.calculate_custom_transform_linear_predictor_to_predictions_function = (
482
293
  self.calculate_custom_transform_linear_predictor_to_predictions_function
483
294
  )
484
- X_transformed = self._preprocess_X_predict(X)
485
- return self.APLRRegressor.predict(
486
- X_transformed, cap_predictions_to_minmax_in_training
487
- )
295
+ return self.APLRRegressor.predict(X, cap_predictions_to_minmax_in_training)
488
296
 
489
297
  def set_term_names(self, X_names: List[str]):
490
298
  self.APLRRegressor.set_term_names(X_names)
@@ -494,44 +302,40 @@ class APLRRegressor(BaseAPLR):
494
302
  X: Union[pd.DataFrame, FloatMatrix],
495
303
  sample_weight: FloatVector = np.empty(0),
496
304
  ) -> FloatVector:
497
- X_transformed = self._preprocess_X_predict(X)
498
- return self.APLRRegressor.calculate_feature_importance(
499
- X_transformed, sample_weight
500
- )
305
+ X = _prepare_input_data(X, self.preprocess)
306
+ return self.APLRRegressor.calculate_feature_importance(X, sample_weight)
501
307
 
502
308
  def calculate_term_importance(
503
309
  self,
504
310
  X: Union[pd.DataFrame, FloatMatrix],
505
311
  sample_weight: FloatVector = np.empty(0),
506
312
  ) -> FloatVector:
507
- X_transformed = self._preprocess_X_predict(X)
508
- return self.APLRRegressor.calculate_term_importance(
509
- X_transformed, sample_weight
510
- )
313
+ X = _prepare_input_data(X, self.preprocess)
314
+ return self.APLRRegressor.calculate_term_importance(X, sample_weight)
511
315
 
512
316
  def calculate_local_feature_contribution(
513
317
  self, X: Union[pd.DataFrame, FloatMatrix]
514
318
  ) -> FloatMatrix:
515
- X_transformed = self._preprocess_X_predict(X)
516
- return self.APLRRegressor.calculate_local_feature_contribution(X_transformed)
319
+ X = _prepare_input_data(X, self.preprocess)
320
+ return self.APLRRegressor.calculate_local_feature_contribution(X)
517
321
 
518
322
  def calculate_local_term_contribution(
519
323
  self, X: Union[pd.DataFrame, FloatMatrix]
520
324
  ) -> FloatMatrix:
521
- X_transformed = self._preprocess_X_predict(X)
522
- return self.APLRRegressor.calculate_local_term_contribution(X_transformed)
325
+ X = _prepare_input_data(X, self.preprocess)
326
+ return self.APLRRegressor.calculate_local_term_contribution(X)
523
327
 
524
328
  def calculate_local_contribution_from_selected_terms(
525
329
  self, X: Union[pd.DataFrame, FloatMatrix], predictor_indexes: List[int]
526
330
  ) -> FloatVector:
527
- X_transformed = self._preprocess_X_predict(X)
331
+ X = _prepare_input_data(X, self.preprocess)
528
332
  return self.APLRRegressor.calculate_local_contribution_from_selected_terms(
529
- X_transformed, predictor_indexes
333
+ X, predictor_indexes
530
334
  )
531
335
 
532
336
  def calculate_terms(self, X: Union[pd.DataFrame, FloatMatrix]) -> FloatMatrix:
533
- X_transformed = self._preprocess_X_predict(X)
534
- return self.APLRRegressor.calculate_terms(X_transformed)
337
+ X = _prepare_input_data(X, self.preprocess)
338
+ return self.APLRRegressor.calculate_terms(X)
535
339
 
536
340
  def get_term_names(self) -> List[str]:
537
341
  return self.APLRRegressor.get_term_names()
@@ -796,6 +600,7 @@ class APLRRegressor(BaseAPLR):
796
600
  "ridge_penalty": self.ridge_penalty,
797
601
  "mean_bias_correction": self.mean_bias_correction,
798
602
  "faster_convergence": self.faster_convergence,
603
+ "preprocess": self.preprocess,
799
604
  }
800
605
 
801
606
  # For sklearn
@@ -805,8 +610,15 @@ class APLRRegressor(BaseAPLR):
805
610
  self.__set_params_cpp()
806
611
  return self
807
612
 
613
+ def __setstate__(self, state):
614
+ # For backwards compatibility with older pickled models
615
+ if "preprocess" not in state:
616
+ state["preprocess"] = False
617
+ self.__dict__.update(state)
618
+ self.__set_params_cpp()
619
+
808
620
 
809
- class APLRClassifier(BaseAPLR):
621
+ class APLRClassifier:
810
622
  def __init__(
811
623
  self,
812
624
  m: int = 3000,
@@ -829,6 +641,7 @@ class APLRClassifier(BaseAPLR):
829
641
  penalty_for_interactions: float = 0.0,
830
642
  max_terms: int = 0,
831
643
  ridge_penalty: float = 0.0001,
644
+ preprocess: bool = True,
832
645
  ):
833
646
  self.m = m
834
647
  self.v = v
@@ -856,14 +669,7 @@ class APLRClassifier(BaseAPLR):
856
669
  self.penalty_for_interactions = penalty_for_interactions
857
670
  self.max_terms = max_terms
858
671
  self.ridge_penalty = ridge_penalty
859
-
860
- # Data transformations
861
- self.median_values_ = {}
862
- self.categorical_features_ = []
863
- self.ohe_columns_ = []
864
- self.na_imputed_cols_ = []
865
- self.X_names_ = []
866
- self.final_training_columns_ = []
672
+ self.preprocess = preprocess
867
673
 
868
674
  # Creating aplr_cpp and setting parameters
869
675
  self.APLRClassifier = aplr_cpp.APLRClassifier()
@@ -899,6 +705,7 @@ class APLRClassifier(BaseAPLR):
899
705
  self.APLRClassifier.penalty_for_interactions = self.penalty_for_interactions
900
706
  self.APLRClassifier.max_terms = self.max_terms
901
707
  self.APLRClassifier.ridge_penalty = self.ridge_penalty
708
+ self.APLRClassifier.preprocess = self.preprocess
902
709
 
903
710
  def fit(
904
711
  self,
@@ -915,11 +722,9 @@ class APLRClassifier(BaseAPLR):
915
722
  predictor_penalties_for_interactions: List[float] = [],
916
723
  predictor_min_observations_in_split: List[int] = [],
917
724
  ):
918
- self._validate_X_fit_rows(X)
919
725
  self.__set_params_cpp()
920
- X_transformed, X_names_transformed = self._preprocess_X_fit(
921
- X, X_names, sample_weight
922
- )
726
+
727
+ X = _prepare_input_data(X, self.preprocess)
923
728
 
924
729
  if isinstance(y, np.ndarray):
925
730
  y = y.astype(str).tolist()
@@ -927,10 +732,10 @@ class APLRClassifier(BaseAPLR):
927
732
  y = [str(val) for val in y]
928
733
 
929
734
  self.APLRClassifier.fit(
930
- X_transformed,
735
+ X,
931
736
  y,
932
737
  sample_weight,
933
- X_names_transformed,
738
+ X_names,
934
739
  cv_observations,
935
740
  prioritized_predictors_indexes,
936
741
  monotonic_constraints,
@@ -948,9 +753,10 @@ class APLRClassifier(BaseAPLR):
948
753
  X: Union[pd.DataFrame, FloatMatrix],
949
754
  cap_predictions_to_minmax_in_training: bool = False,
950
755
  ) -> FloatMatrix:
951
- X_transformed = self._preprocess_X_predict(X)
756
+ X = _prepare_input_data(X, self.preprocess)
757
+
952
758
  return self.APLRClassifier.predict_class_probabilities(
953
- X_transformed, cap_predictions_to_minmax_in_training
759
+ X, cap_predictions_to_minmax_in_training
954
760
  )
955
761
 
956
762
  def predict(
@@ -958,16 +764,15 @@ class APLRClassifier(BaseAPLR):
958
764
  X: Union[pd.DataFrame, FloatMatrix],
959
765
  cap_predictions_to_minmax_in_training: bool = False,
960
766
  ) -> List[str]:
961
- X_transformed = self._preprocess_X_predict(X)
962
- return self.APLRClassifier.predict(
963
- X_transformed, cap_predictions_to_minmax_in_training
964
- )
767
+ X = _prepare_input_data(X, self.preprocess)
768
+
769
+ return self.APLRClassifier.predict(X, cap_predictions_to_minmax_in_training)
965
770
 
966
771
  def calculate_local_feature_contribution(
967
772
  self, X: Union[pd.DataFrame, FloatMatrix]
968
773
  ) -> FloatMatrix:
969
- X_transformed = self._preprocess_X_predict(X)
970
- return self.APLRClassifier.calculate_local_feature_contribution(X_transformed)
774
+ X = _prepare_input_data(X, self.preprocess)
775
+ return self.APLRClassifier.calculate_local_feature_contribution(X)
971
776
 
972
777
  def get_categories(self) -> List[str]:
973
778
  return self.APLRClassifier.get_categories()
@@ -998,6 +803,7 @@ class APLRClassifier(BaseAPLR):
998
803
  penalty_for_interactions=self.penalty_for_interactions,
999
804
  max_terms=self.max_terms,
1000
805
  ridge_penalty=self.ridge_penalty,
806
+ preprocess=self.preprocess,
1001
807
  )
1002
808
 
1003
809
  logit_model_py.APLRRegressor = logit_model_cpp
@@ -1048,6 +854,7 @@ class APLRClassifier(BaseAPLR):
1048
854
  "penalty_for_interactions": self.penalty_for_interactions,
1049
855
  "max_terms": self.max_terms,
1050
856
  "ridge_penalty": self.ridge_penalty,
857
+ "preprocess": self.preprocess,
1051
858
  }
1052
859
 
1053
860
  # For sklearn
@@ -1061,6 +868,13 @@ class APLRClassifier(BaseAPLR):
1061
868
  def predict_proba(self, X: FloatMatrix) -> FloatMatrix:
1062
869
  return self.predict_class_probabilities(X)
1063
870
 
871
+ def __setstate__(self, state):
872
+ # For backwards compatibility with older pickled models
873
+ if "preprocess" not in state:
874
+ state["preprocess"] = False
875
+ self.__dict__.update(state)
876
+ self.__set_params_cpp()
877
+
1064
878
 
1065
879
  class APLRTuner:
1066
880
  def __init__(
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: aplr
3
- Version: 10.19.2
3
+ Version: 10.20.0
4
4
  Summary: Automatic Piecewise Linear Regression
5
5
  Home-page: https://github.com/ottenbreit-data-science/aplr
6
6
  Author: Mathias von Ottenbreit
@@ -0,0 +1,8 @@
1
+ aplr_cpp.cp311-win_amd64.pyd,sha256=ecU1pYcAJ3K_B_pcYJS06DiNSMpCCovciVQWGpbWiwA,825856
2
+ aplr/__init__.py,sha256=oDFSgVytP_qQ8ilun6oHxKr-DYEeqjEQp5FciX45lls,21
3
+ aplr/aplr.py,sha256=I_LyS_uH9wmW7wE8uP6nvPhfCeeA1cQXIuLPqwT21OE,40125
4
+ aplr-10.20.0.dist-info/licenses/LICENSE,sha256=YOMo-RaL4P7edMZGD96-NskKpxyMZdP3-WiiMMmihNk,1134
5
+ aplr-10.20.0.dist-info/METADATA,sha256=sRYG3XKunuA5qB-cm-6XBqUd4XRIgK1tVriXV3PX_5Y,1048
6
+ aplr-10.20.0.dist-info/WHEEL,sha256=JLOMsP7F5qtkAkINx5UnzbFguf8CqZeraV8o04b0I8I,101
7
+ aplr-10.20.0.dist-info/top_level.txt,sha256=DXVC0RIFGpzVnPeKWAZTXQdJheOEZL51Wip6Fx7zbR4,14
8
+ aplr-10.20.0.dist-info/RECORD,,
Binary file
@@ -1,8 +0,0 @@
1
- aplr_cpp.cp311-win_amd64.pyd,sha256=jFpeuXLruiu6gB8tB7mxrTWm3Q931259woyywenX_Is,674816
2
- aplr/__init__.py,sha256=oDFSgVytP_qQ8ilun6oHxKr-DYEeqjEQp5FciX45lls,21
3
- aplr/aplr.py,sha256=Pv_6dSaZ7WIbP6vzzB6-R8S0VLcKvlVJyP0RBToDZKw,48320
4
- aplr-10.19.2.dist-info/licenses/LICENSE,sha256=YOMo-RaL4P7edMZGD96-NskKpxyMZdP3-WiiMMmihNk,1134
5
- aplr-10.19.2.dist-info/METADATA,sha256=e1-enJ5c5XtbnxCHKe-YNFq8EZAve0uOkxuiA-rKQWY,1048
6
- aplr-10.19.2.dist-info/WHEEL,sha256=JLOMsP7F5qtkAkINx5UnzbFguf8CqZeraV8o04b0I8I,101
7
- aplr-10.19.2.dist-info/top_level.txt,sha256=DXVC0RIFGpzVnPeKWAZTXQdJheOEZL51Wip6Fx7zbR4,14
8
- aplr-10.19.2.dist-info/RECORD,,
File without changes