PyPI - linearrf - Versions diffs - 1.2.2__tar.gz → 1.2.4__tar.gz - Mend

linearrf 1.2.2tar.gz → 1.2.4tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (19) hide show

{linearrf-1.2.2/src/linearrf.egg-info → linearrf-1.2.4}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: linearrf
-Version: 1.2.2
+Version: 1.2.4
 Summary: A python libary to build Random Forests with Linear Models at the leaves.
 Author-email: Marian Biermann <marianbiermann@gmx.de>
 Project-URL: homepage, https://github.com/marianbiermann/lrf

{linearrf-1.2.2 → linearrf-1.2.4}/pyproject.toml RENAMED Viewed

@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 [project]
 name = "linearrf"
-version = "1.2.2"
+version = "1.2.4"
 description = "A python libary to build Random Forests with Linear Models at the leaves."
 readme = "README.md"
 authors = [{ name = "Marian Biermann", email = "marianbiermann@gmx.de" }]

{linearrf-1.2.2 → linearrf-1.2.4/src/linearrf.egg-info}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: linearrf
-Version: 1.2.2
+Version: 1.2.4
 Summary: A python libary to build Random Forests with Linear Models at the leaves.
 Author-email: Marian Biermann <marianbiermann@gmx.de>
 Project-URL: homepage, https://github.com/marianbiermann/lrf

{linearrf-1.2.2 → linearrf-1.2.4}/src/lrf/_criterion.py RENAMED Viewed

@@ -82,43 +82,61 @@ def neg_mcc(y_true: np.ndarray, y_pred: np.ndarray):
 def neg_roc_auc(y_true: np.ndarray, y_pred: np.ndarray):
-    # Rank-based ROC AUC in O(n log n). Sort by descending score and sweep:
-    # each step adds the next sample to "predicted positive", so cumulative
-    # positives / negatives give TPR / FPR at every distinct cutoff.
+    # Rank-based ROC AUC in O(n log n). Sort by descending score, then evaluate
+    # TPR/FPR only at distinct-score endpoints — collapsing each tied group
+    # into a single point. Without this, tied predictions produce results that
+    # depend on the (arbitrary) within-tie sort order.
     y_true = np.asarray(y_true)
     y_pred = np.asarray(y_pred)
     order = np.argsort(-y_pred, kind='stable')
     y_sorted = y_true[order].astype(np.float64)
+    pred_sorted = y_pred[order]
-    positives = y_sorted.sum()
-    negatives = y_sorted.size - positives
+    tps_full = np.cumsum(y_sorted)
+    fps_full = np.arange(1, y_sorted.size + 1, dtype=np.float64) - tps_full
+    # End of each tied group: positions where the next prediction differs,
+    # plus the very last position.
+    endpoints = np.concatenate((np.where(np.diff(pred_sorted))[0],
+                                [y_sorted.size - 1]))
+    tps = tps_full[endpoints]
+    fps = fps_full[endpoints]
+    positives, negatives = tps[-1], fps[-1]
     if positives == 0 or negatives == 0:
         return 0.0
-    tpr = np.concatenate(([0.0], np.cumsum(y_sorted) / positives))
-    fpr = np.concatenate(([0.0], np.cumsum(1.0 - y_sorted) / negatives))
+    tpr = np.concatenate(([0.0], tps / positives))
+    fpr = np.concatenate(([0.0], fps / negatives))
     # negated so the caller minimizes AUC (matches the previous convention)
     return -_trapz(tpr, fpr)
 def neg_pr_auc(y_true: np.ndarray, y_pred: np.ndarray):
-    # Rank-based PR AUC in O(n log n) — same idea as neg_roc_auc above.
+    # Rank-based PR AUC in O(n log n). Same tie-collapsing as neg_roc_auc.
     y_true = np.asarray(y_true)
     y_pred = np.asarray(y_pred)
     order = np.argsort(-y_pred, kind='stable')
     y_sorted = y_true[order].astype(np.float64)
+    pred_sorted = y_pred[order]
+    tps_full = np.cumsum(y_sorted)
+    fps_full = np.arange(1, y_sorted.size + 1, dtype=np.float64) - tps_full
+    endpoints = np.concatenate((np.where(np.diff(pred_sorted))[0],
+                                [y_sorted.size - 1]))
+    tps = tps_full[endpoints]
+    fps = fps_full[endpoints]
-    positives = y_sorted.sum()
+    positives = tps[-1]
     if positives == 0:
         return 0.0
-    pos_cum = np.cumsum(y_sorted)
-    ranks = np.arange(1, y_sorted.size + 1, dtype=np.float64)
-    precision = pos_cum / ranks
-    recall = pos_cum / positives
+    precision = tps / (tps + fps)
+    recall = tps / positives
     return -_trapz(precision, recall)

{linearrf-1.2.2 → linearrf-1.2.4}/src/lrf/lrf.py RENAMED Viewed

@@ -21,6 +21,9 @@ class LRFRegressor(_LinearRandomForest):
         self.preprocessing = preprocessing
         self._estimator_type = 'regressor'
+        if criterion not in ('mse', 'rmse', 'mae', 'mape', 'wape', 'neg_explained_variance', 'neg_r2'):
+            print(' Metric "{}" is not implemented, MSE is used instead.'.format(criterion))
         if linear_model is None:
             linear_model = Regressor(alpha=self.alpha, preprocessing=self.preprocessing, intercept_in_input=True)
         else:
@@ -83,7 +86,6 @@ class LRFRegressor(_LinearRandomForest):
         elif self.criterion == 'neg_r2':
             val = neg_r2(y_true=y_true, y_pred=y_pred)
         else:
-            print(' Metric "{}" is not implemented, MSE is used instead.'.format(self.criterion))
             val = mse(y_true=y_true, y_pred=y_pred)
         return val
@@ -114,6 +116,10 @@ class LRFClassifier(_LinearRandomForest):
         self.preprocessing = preprocessing
         self._estimator_type = 'classifier'
+        if criterion not in ('hamming', 'cross_entropy', 'neg_mcc', 'neg_roc_auc', 'neg_pr_auc'):
+            print(' Metric "{}" is not implemented, negative Matthews Correlation Coefficient is used instead.'
+                  .format(criterion))
         if linear_model is None:
             linear_model = Classifier(C=self.C, preprocessing=self.preprocessing, intercept_in_input=True)
         else:
@@ -157,7 +163,8 @@ class LRFClassifier(_LinearRandomForest):
             x = x.to_numpy()
         # add intercept here and not inside linear model for performance reasons
-        x = np.insert(x, 0, 1, axis=1)
+        if self.linear_model is None:
+            x = np.insert(x, 0, 1, axis=1)
         # add columns with row index for sorting after multiprocessing
         x = np.insert(x, 0, np.arange(x.shape[0]), axis=1)
@@ -213,8 +220,6 @@ class LRFClassifier(_LinearRandomForest):
         elif self.criterion == 'neg_roc_auc':
             val = neg_roc_auc(y_true=y_true, y_pred=y_pred)
         else:
-            print(' Metric "{}" is not implemented, the negative Matthews Correlation Coefficient is used '
-                  'instead.'.format(self.criterion))
             val = neg_mcc(y_true=y_true, y_pred=y_pred)
         return val