cvmatrix 2.0.0__tar.gz → 2.0.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {cvmatrix-2.0.0 → cvmatrix-2.0.2}/PKG-INFO +2 -2
- {cvmatrix-2.0.0 → cvmatrix-2.0.2}/README.md +1 -1
- cvmatrix-2.0.2/cvmatrix/__init__.py +1 -0
- {cvmatrix-2.0.0 → cvmatrix-2.0.2}/cvmatrix/cvmatrix.py +22 -56
- {cvmatrix-2.0.0 → cvmatrix-2.0.2}/pyproject.toml +1 -1
- cvmatrix-2.0.0/cvmatrix/__init__.py +0 -1
- {cvmatrix-2.0.0 → cvmatrix-2.0.2}/LICENSE +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.3
|
|
2
2
|
Name: cvmatrix
|
|
3
|
-
Version: 2.0.
|
|
3
|
+
Version: 2.0.2
|
|
4
4
|
Summary: Fast computation of possibly weighted and possibly centered/scaled training set kernel matrices in a cross-validation setting.
|
|
5
5
|
License: Apache-2.0
|
|
6
6
|
Author: Sm00thix
|
|
@@ -118,7 +118,7 @@ Guidelines](https://github.com/Sm00thix/CVMatrix/blob/main/CONTRIBUTING.md).
|
|
|
118
118
|
|
|
119
119
|
1. [Engstrøm, O.-C. G. and Jensen, M. H. (2025). Fast partition-based cross-validation with centering and scaling for $\mathbf{X}^\mathbf{T}\mathbf{X}$ and $\mathbf{X}^\mathbf{T}\mathbf{Y}$. *Journal of Chemometrics*, 39(3).](https://doi.org/10.1002/cem.70008)
|
|
120
120
|
2. [Dayal, B. S. and MacGregor, J. F. (1997). Improved PLS algorithms. *Journal of Chemometrics*, 11(1), 73-85.](https://doi.org/10.1002/(SICI)1099-128X(199701)11:1%3C73::AID-CEM435%3E3.0.CO;2-%23?)
|
|
121
|
-
3. [Engstrøm, O.-C. G. and Dreier, E. S. and Jespersen, B. M. and Pedersen, K. S. IKPLS: Improved Kernel Partial Least Squares and Fast Cross-Validation Algorithms for Python with CPU and GPU Implementations Using NumPy and JAX. *Journal of Open Source Software*, 9(99).](https://doi.org/10.21105/joss.06533)
|
|
121
|
+
3. [Engstrøm, O.-C. G. and Dreier, E. S. and Jespersen, B. M. and Pedersen, K. S. (2024). IKPLS: Improved Kernel Partial Least Squares and Fast Cross-Validation Algorithms for Python with CPU and GPU Implementations Using NumPy and JAX. *Journal of Open Source Software*, 9(99).](https://doi.org/10.21105/joss.06533)
|
|
122
122
|
|
|
123
123
|
## Funding
|
|
124
124
|
- Up until May 31st 2025, this work has been carried out as part of an industrial Ph. D. project receiving funding from [FOSS Analytical A/S](https://www.fossanalytics.com/) and [The Innovation Fund Denmark](https://innovationsfonden.dk/en). Grant number 1044-00108B.
|
|
@@ -96,7 +96,7 @@ Guidelines](https://github.com/Sm00thix/CVMatrix/blob/main/CONTRIBUTING.md).
|
|
|
96
96
|
|
|
97
97
|
1. [Engstrøm, O.-C. G. and Jensen, M. H. (2025). Fast partition-based cross-validation with centering and scaling for $\mathbf{X}^\mathbf{T}\mathbf{X}$ and $\mathbf{X}^\mathbf{T}\mathbf{Y}$. *Journal of Chemometrics*, 39(3).](https://doi.org/10.1002/cem.70008)
|
|
98
98
|
2. [Dayal, B. S. and MacGregor, J. F. (1997). Improved PLS algorithms. *Journal of Chemometrics*, 11(1), 73-85.](https://doi.org/10.1002/(SICI)1099-128X(199701)11:1%3C73::AID-CEM435%3E3.0.CO;2-%23?)
|
|
99
|
-
3. [Engstrøm, O.-C. G. and Dreier, E. S. and Jespersen, B. M. and Pedersen, K. S. IKPLS: Improved Kernel Partial Least Squares and Fast Cross-Validation Algorithms for Python with CPU and GPU Implementations Using NumPy and JAX. *Journal of Open Source Software*, 9(99).](https://doi.org/10.21105/joss.06533)
|
|
99
|
+
3. [Engstrøm, O.-C. G. and Dreier, E. S. and Jespersen, B. M. and Pedersen, K. S. (2024). IKPLS: Improved Kernel Partial Least Squares and Fast Cross-Validation Algorithms for Python with CPU and GPU Implementations Using NumPy and JAX. *Journal of Open Source Software*, 9(99).](https://doi.org/10.21105/joss.06533)
|
|
100
100
|
|
|
101
101
|
## Funding
|
|
102
102
|
- Up until May 31st 2025, this work has been carried out as part of an industrial Ph. D. project receiving funding from [FOSS Analytical A/S](https://www.fossanalytics.com/) and [The Innovation Fund Denmark](https://innovationsfonden.dk/en). Grant number 1044-00108B.
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
__version__ = "2.0.2"
|
|
@@ -99,8 +99,6 @@ class CVMatrix:
|
|
|
99
99
|
self.N = None
|
|
100
100
|
self.K = None
|
|
101
101
|
self.M = None
|
|
102
|
-
self.X_total_mean = None
|
|
103
|
-
self.Y_total_mean = None
|
|
104
102
|
self.XTX_total = None
|
|
105
103
|
self.XTY_total = None
|
|
106
104
|
self.sum_X_total = None
|
|
@@ -146,7 +144,6 @@ class CVMatrix:
|
|
|
146
144
|
ValueError
|
|
147
145
|
If `weights` is provided and contains negative values.
|
|
148
146
|
"""
|
|
149
|
-
|
|
150
147
|
self._init_mats(X, Y, weights)
|
|
151
148
|
self._init_weighted_mats()
|
|
152
149
|
self._init_matrix_products()
|
|
@@ -353,26 +350,19 @@ class CVMatrix:
|
|
|
353
350
|
"The number of non-zero weights in the training set must be "
|
|
354
351
|
"greater than zero."
|
|
355
352
|
)
|
|
356
|
-
sum_w_total_over_sum_w_train = self.sum_w_total / sum_w_train
|
|
357
|
-
sum_w_val_over_sum_w_train = sum_w_val / sum_w_train
|
|
358
|
-
|
|
359
353
|
if self.center_X or self.scale_X or (return_XTY and self.center_Y):
|
|
360
354
|
sum_X_val = np.sum(X_val, axis=0, keepdims=True)
|
|
361
355
|
X_train_mean = self._compute_training_mat_mean(
|
|
362
356
|
sum_X_val,
|
|
363
|
-
|
|
364
|
-
|
|
365
|
-
sum_w_total_over_sum_w_train,
|
|
366
|
-
sum_w_val_over_sum_w_train,
|
|
357
|
+
self.sum_X_total,
|
|
358
|
+
sum_w_train,
|
|
367
359
|
)
|
|
368
360
|
if return_XTY and (self.center_X or self.center_Y or self.scale_Y):
|
|
369
361
|
sum_Y_val = np.sum(Y_val, axis=0, keepdims=True)
|
|
370
362
|
Y_train_mean = self._compute_training_mat_mean(
|
|
371
363
|
sum_Y_val,
|
|
372
|
-
|
|
373
|
-
|
|
374
|
-
sum_w_total_over_sum_w_train,
|
|
375
|
-
sum_w_val_over_sum_w_train,
|
|
364
|
+
self.sum_Y_total,
|
|
365
|
+
sum_w_train,
|
|
376
366
|
)
|
|
377
367
|
if self.scale_X or (self.scale_Y and return_XTY):
|
|
378
368
|
divisor = self._compute_std_divisor(sum_w_train, num_nonzero_w_train)
|
|
@@ -519,10 +509,8 @@ class CVMatrix:
|
|
|
519
509
|
def _compute_training_mat_mean(
|
|
520
510
|
self,
|
|
521
511
|
sum_mat_val: np.ndarray,
|
|
522
|
-
|
|
523
|
-
|
|
524
|
-
sum_w_total_over_sum_w_train: float,
|
|
525
|
-
sum_w_val_over_sum_w_train: float,
|
|
512
|
+
sum_mat_total: np.ndarray,
|
|
513
|
+
sum_w_train: float,
|
|
526
514
|
) -> np.ndarray:
|
|
527
515
|
"""
|
|
528
516
|
Computes the row of column-wise means of a matrix for a given fold.
|
|
@@ -532,34 +520,18 @@ class CVMatrix:
|
|
|
532
520
|
sum_mat_val : Array of shape (1, K) or (1, M)
|
|
533
521
|
The row of column-wise sums of validation set of `Xw` or `Yw`.
|
|
534
522
|
|
|
535
|
-
|
|
536
|
-
The
|
|
537
|
-
|
|
538
|
-
mat_total_mean : Array of shape (1, K) or (1, M)
|
|
539
|
-
The row of column-wise weighted means of the total matrix.
|
|
540
|
-
|
|
541
|
-
sum_w_total_over_sum_w_train : float
|
|
542
|
-
The ratio of the sum of weights in the entire dataset to the sum of weights
|
|
543
|
-
in the training set.
|
|
544
|
-
|
|
545
|
-
sum_w_val_over_sum_w_train : float
|
|
546
|
-
The ratio of the sum of weights in the validation set to the sum of weights
|
|
547
|
-
in the training set.
|
|
523
|
+
sum_mat_total : Array of shape (1, K) or (1, M)
|
|
524
|
+
The row of column-wise sums of the total `Xw` or `Yw`.
|
|
548
525
|
|
|
549
|
-
|
|
550
|
-
The sum of weights in the
|
|
526
|
+
sum_w_train : float
|
|
527
|
+
The sum of weights in the training set.
|
|
551
528
|
|
|
552
529
|
Returns
|
|
553
530
|
-------
|
|
554
531
|
Array of shape (1, K) or (1, M)
|
|
555
532
|
The row of column-wise means of the training set matrix.
|
|
556
533
|
"""
|
|
557
|
-
|
|
558
|
-
if sum_w_val <= self.eps:
|
|
559
|
-
return train_part_contribution
|
|
560
|
-
return train_part_contribution - sum_w_val_over_sum_w_train * (
|
|
561
|
-
sum_mat_val / sum_w_val
|
|
562
|
-
)
|
|
534
|
+
return (sum_mat_total - sum_mat_val) / sum_w_train
|
|
563
535
|
|
|
564
536
|
def _compute_std_divisor(
|
|
565
537
|
self, sum_w_train: float, num_nonzero_w_train: int
|
|
@@ -745,33 +717,28 @@ class CVMatrix:
|
|
|
745
717
|
"""
|
|
746
718
|
Initializes the global statistics for `X` and `Y`.
|
|
747
719
|
"""
|
|
748
|
-
if self.
|
|
749
|
-
self.
|
|
750
|
-
|
|
751
|
-
|
|
752
|
-
|
|
753
|
-
|
|
720
|
+
if self.center_X or self.center_Y or self.scale_X or self.scale_Y:
|
|
721
|
+
if self.w_total is not None:
|
|
722
|
+
self.sum_w_total = np.sum(self.w_total)
|
|
723
|
+
self.num_nonzero_w_total = np.count_nonzero(self.w_total)
|
|
724
|
+
else:
|
|
725
|
+
self.sum_w_total = self.N
|
|
726
|
+
self.num_nonzero_w_total = self.N
|
|
754
727
|
if self.center_X or self.center_Y or self.scale_X:
|
|
755
728
|
self.sum_X_total = np.sum(self.Xw_total, axis=0, keepdims=True)
|
|
756
|
-
self.X_total_mean = self.sum_X_total / self.sum_w_total
|
|
757
|
-
else:
|
|
758
|
-
self.X_total_mean = None
|
|
759
729
|
if (
|
|
760
730
|
self.center_X or self.center_Y or self.scale_Y
|
|
761
731
|
) and self.Y_total is not None:
|
|
762
732
|
self.sum_Y_total = np.sum(self.Yw_total, axis=0, keepdims=True)
|
|
763
|
-
self.Y_total_mean = self.sum_Y_total / self.sum_w_total
|
|
764
|
-
else:
|
|
765
|
-
self.Y_total_mean = None
|
|
766
733
|
if self.scale_X:
|
|
767
|
-
self.sum_sq_X_total = np.
|
|
768
|
-
|
|
734
|
+
self.sum_sq_X_total = np.sum(
|
|
735
|
+
self.Xw_total * self.X_total, axis=0, keepdims=True
|
|
769
736
|
)
|
|
770
737
|
else:
|
|
771
738
|
self.sum_sq_X_total = None
|
|
772
739
|
if self.scale_Y and self.Y_total is not None:
|
|
773
|
-
self.sum_sq_Y_total = np.
|
|
774
|
-
|
|
740
|
+
self.sum_sq_Y_total = np.sum(
|
|
741
|
+
self.Yw_total * self.Y_total, axis=0, keepdims=True
|
|
775
742
|
)
|
|
776
743
|
else:
|
|
777
744
|
self.sum_sq_Y_total = None
|
|
@@ -788,7 +755,6 @@ class CVMatrix:
|
|
|
788
755
|
An iterable defining cross-validation splits. Each unique value in
|
|
789
756
|
`folds` corresponds to a different fold.
|
|
790
757
|
"""
|
|
791
|
-
|
|
792
758
|
folds_dict: "defaultdict[Hashable, list[int]]" = defaultdict(list)
|
|
793
759
|
for i, num in enumerate(folds):
|
|
794
760
|
folds_dict[num].append(i)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[tool.poetry]
|
|
2
2
|
name = "cvmatrix"
|
|
3
|
-
version = "2.0.
|
|
3
|
+
version = "2.0.2"
|
|
4
4
|
description = "Fast computation of possibly weighted and possibly centered/scaled training set kernel matrices in a cross-validation setting."
|
|
5
5
|
authors = ["Sm00thix <oleemail@icloud.com>"]
|
|
6
6
|
maintainers = ["Sm00thix <oleemail@icloud.com>"]
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
__version__ = "2.0.0"
|
|
File without changes
|