cvmatrix 2.0.2__tar.gz → 2.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: cvmatrix
3
- Version: 2.0.2
3
+ Version: 2.1.0
4
4
  Summary: Fast computation of possibly weighted and possibly centered/scaled training set kernel matrices in a cross-validation setting.
5
5
  License: Apache-2.0
6
6
  Author: Sm00thix
@@ -0,0 +1 @@
1
+ __version__ = "2.1.0"
@@ -11,7 +11,7 @@ E-mail: ole.e@di.ku.dk
11
11
 
12
12
  from collections import defaultdict
13
13
  from collections.abc import Hashable
14
- from typing import Iterable, Union
14
+ from typing import Iterable, Tuple, Union
15
15
 
16
16
  import numpy as np
17
17
  from numpy import typing as npt
@@ -93,7 +93,7 @@ class CVMatrix:
93
93
  self.ddof = ddof
94
94
  self.dtype = dtype
95
95
  self.copy = copy
96
- self.eps = np.finfo(dtype).eps * 100
96
+ self.resolution = np.finfo(dtype).resolution * 10
97
97
  self.X_total = None
98
98
  self.Y_total = None
99
99
  self.N = None
@@ -149,10 +149,16 @@ class CVMatrix:
149
149
  self._init_matrix_products()
150
150
  self._init_total_stats()
151
151
 
152
- def training_XTX(self, fold: Hashable) -> np.ndarray:
152
+ def training_XTX(
153
+ self, fold: Hashable
154
+ ) -> Tuple[
155
+ np.ndarray, Tuple[Union[None, np.ndarray], Union[None, np.ndarray], None, None]
156
+ ]:
153
157
  """
154
- Returns the training set :math:`\mathbf{X}^{\mathbf{T}}\mathbf{W}\mathbf{X}`
155
- corresponding to every sample except those belonging to the given fold.
158
+ Computes the training set :math:`\mathbf{X}^{\mathbf{T}}\mathbf{W}\mathbf{X}`
159
+ corresponding to every sample except those belonging to the given fold. Also
160
+ computes the row of column-wise weighted means for `X` and the row of
161
+ column-wise weighted standard deviations for `X`.
156
162
 
157
163
  Parameters
158
164
  ----------
@@ -162,8 +168,13 @@ class CVMatrix:
162
168
 
163
169
  Returns
164
170
  -------
165
- Array of shape (K, K)
166
- The training set :math:`\mathbf{X}^{\mathbf{T}}\mathbf{W}\mathbf{X}`.
171
+ Tuple of two elements. The first element is an array of shape (K, K)
172
+ corresponding to the training set
173
+ :math:`\mathbf{X}^{\mathbf{T}}\mathbf{W}\mathbf{X}`. The second element is
174
+ a tuple containing the row of column-wise weighted means for `X`, the row
175
+ of column-wise weighted standard deviations for `X`, and two `None`
176
+ corresponding to the non-computed rows of column-wise weighted means and
177
+ standard deviations for `Y`. If a statistic is not computed, it is `None`.
167
178
 
168
179
  Raises
169
180
  ------
@@ -174,20 +185,34 @@ class CVMatrix:
174
185
  See Also
175
186
  --------
176
187
  training_XTY :
177
- Returns the training set
178
- :math:`\mathbf{X}^{\mathbf{T}}\mathbf{W}\mathbf{Y}`
188
+ Computes the training set
189
+ :math:`\mathbf{X}^{\mathbf{T}}\mathbf{W}\mathbf{Y}` and weighted
190
+ statistics.
179
191
  training_XTX_XTY :
180
- Returns the training set
181
- :math:`\mathbf{X}^{\mathbf{T}}\mathbf{W}\mathbf{X}` and
182
- :math:`\mathbf{X}^{\mathbf{T}}\mathbf{W}\mathbf{Y}` for a given fold. This
183
- method is faster than calling `training_XTX` and `training_XTY` separately.
192
+ Computes the training set
193
+ :math:`\mathbf{X}^{\mathbf{T}}\mathbf{W}\mathbf{X}`,
194
+ :math:`\mathbf{X}^{\mathbf{T}}\mathbf{W}\mathbf{Y}`, and weighted
195
+ statistics for a given fold. This method is faster than calling
196
+ `training_XTX` and `training_XTY` separately.
184
197
  """
185
198
  return self._training_matrices(True, False, fold)
186
199
 
187
- def training_XTY(self, fold: Hashable) -> np.ndarray:
200
+ def training_XTY(self, fold: Hashable) -> Tuple[
201
+ np.ndarray,
202
+ Tuple[
203
+ Union[None, np.ndarray],
204
+ Union[None, np.ndarray],
205
+ Union[None, np.ndarray],
206
+ Union[None, np.ndarray],
207
+ ],
208
+ ]:
188
209
  """
189
- Returns the training set :math:`\mathbf{X}^{\mathbf{T}}\mathbf{Y}`
190
- corresponding to every sample except those belonging to the given fold.
210
+ Computes the training set :math:`\mathbf{X}^{\mathbf{T}}\mathbf{W}\mathbf{Y}`
211
+ corresponding to every sample except those belonging to the given fold. Also
212
+ computes the row of column-wise weighted means for `X`, the row of column-wise
213
+ weighted standard deviations for `X`, the row of column-wise weighted means for
214
+ `Y`, and the row of column-wise weighted standard deviations for `Y`. If a
215
+ statistic is not computed, it is `None`.
191
216
 
192
217
  Parameters
193
218
  ----------
@@ -197,8 +222,13 @@ class CVMatrix:
197
222
 
198
223
  Returns
199
224
  -------
200
- Array of shape (K, M)
201
- The training set :math:`\mathbf{X}^{\mathbf{T}}\mathbf{W}\mathbf{Y}`.
225
+ Tuple of two elements. The first element is an array of shape (K, M)
226
+ corresponding to the training set
227
+ :math:`\mathbf{X}^{\mathbf{T}}\mathbf{W}\mathbf{Y}`. The second element
228
+ is a tuple containing the row of column-wise weighted means for `X`, the
229
+ row of column-wise weighted standard deviations for `X`, the row of
230
+ column-wise weighted means for `Y`, and the row of column-wise weighted
231
+ standard deviations for `Y`. If a statistic is not computed, it is `None`.
202
232
 
203
233
  Raises
204
234
  ------
@@ -212,20 +242,36 @@ class CVMatrix:
212
242
  See Also
213
243
  --------
214
244
  training_XTX :
215
- Returns the training set :math:`\mathbf{X}^{\mathbf{T}}\mathbf{X}`
245
+ Computes the training set
246
+ :math:`\mathbf{X}^{\mathbf{T}}\mathbf{W}\mathbf{X}` and weighted statistics
247
+ for a given fold.
216
248
  training_XTX_XTY :
217
- Returns the training set
218
- :math:`\mathbf{X}^{\mathbf{T}}\mathbf{W}\mathbf{X}` and
219
- :math:`\mathbf{X}^{\mathbf{T}}\mathbf{W}\mathbf{Y}` for a given fold. This
220
- method is faster than calling `training_XTX` and `training_XTY` separately.
249
+ Computes the training set
250
+ :math:`\mathbf{X}^{\mathbf{T}}\mathbf{W}\mathbf{X}`,
251
+ :math:`\mathbf{X}^{\mathbf{T}}\mathbf{W}\mathbf{Y}`, and weighted
252
+ statistics for a given fold. This method is faster than calling
253
+ `training_XTX` and `training_XTY` separately.
221
254
  """
222
255
  return self._training_matrices(False, True, fold)
223
256
 
224
- def training_XTX_XTY(self, fold: Hashable) -> tuple[np.ndarray, np.ndarray]:
257
+ def training_XTX_XTY(self, fold: Hashable) -> Tuple[
258
+ Tuple[np.ndarray, np.ndarray],
259
+ Tuple[
260
+ Union[None, np.ndarray],
261
+ Union[None, np.ndarray],
262
+ Union[None, np.ndarray],
263
+ Union[None, np.ndarray],
264
+ ],
265
+ ]:
225
266
  """
226
- Returns the training set :math:`\mathbf{X}^{\mathbf{T}}\mathbf{W}\mathbf{X}`
267
+ Computes the training set :math:`\mathbf{X}^{\mathbf{T}}\mathbf{W}\mathbf{X}`
227
268
  and :math:`\mathbf{X}^{\mathbf{T}}\mathbf{W}\mathbf{Y}` corresponding to every
228
- sample except those belonging to the given fold.
269
+ sample except those belonging to the given fold. Also computes the row of
270
+ column-wise weighted means for `X`, the row of column-wise weighted standard
271
+ deviations for `X`, the row of column-wise weighted means for `Y`, and the row
272
+ of column-wise weighted standard deviations for `Y`. If a statistic is not
273
+ computed, it is `None`.
274
+
229
275
 
230
276
  Parameters
231
277
  ----------
@@ -236,9 +282,14 @@ class CVMatrix:
236
282
 
237
283
  Returns
238
284
  -------
239
- tuple of arrays of shapes (K, K) and (K, M)
240
- The training set :math:`\mathbf{X}^{\mathbf{T}}\mathbf{W}\mathbf{X}` and
241
- :math:`\mathbf{X}^{\mathbf{T}}\mathbf{W}\mathbf{Y}`.
285
+ Tuple of two tuples. The first tuple contains arrays of shapes (K, K) and
286
+ (K, M). These are the training set
287
+ :math:`\mathbf{X}^{\mathbf{T}}\mathbf{W}\mathbf{X}` and
288
+ :math:`\mathbf{X}^{\mathbf{T}}\mathbf{W}\mathbf{Y}`. The second tuple
289
+ contains the row of column-wise weighted means for `X`, the row of
290
+ column-wise weighted standard deviations for `X`, the row of column-wise
291
+ weighted means for `Y`, and the row of column-wise weighted standard
292
+ deviations for `Y`. If a statistic is not computed, it is `None`.
242
293
 
243
294
  Raises
244
295
  ------
@@ -252,121 +303,204 @@ class CVMatrix:
252
303
  See Also
253
304
  --------
254
305
  training_XTX :
255
- Returns the training set
256
- :math:`\mathbf{X}^{\mathbf{T}}\mathbf{W}\mathbf{X}`
306
+ Computes the training set
307
+ :math:`\mathbf{X}^{\mathbf{T}}\mathbf{W}\mathbf{X}` and weighted
308
+ statistics.
257
309
  training_XTY :
258
- Returns the training set
259
- :math:`\mathbf{X}^{\mathbf{T}}\mathbf{W}\mathbf{Y}`
310
+ Computes the training set
311
+ :math:`\mathbf{X}^{\mathbf{T}}\mathbf{W}\mathbf{Y}` and weighted
312
+ statistics.
260
313
  """
261
314
  return self._training_matrices(True, True, fold)
262
315
 
263
- def _training_matrices(
264
- self, return_XTX: bool, return_XTY: bool, fold: Hashable
265
- ) -> Union[np.ndarray, tuple[np.ndarray, np.ndarray]]:
316
+ def training_statistics(self, fold: Hashable) -> Tuple[
317
+ Union[None, np.ndarray],
318
+ Union[None, np.ndarray],
319
+ Union[None, np.ndarray],
320
+ Union[None, np.ndarray],
321
+ ]:
266
322
  """
267
- Returns the training set :math:`\mathbf{X}^{\mathbf{T}}\mathbf{W}\mathbf{X}`
268
- and/or :math:`\mathbf{X}^{\mathbf{T}}\mathbf{W}\mathbf{Y}` corresponding to
269
- every sample except those belonging to the given fold.
323
+ Computes the row of column-wise weighted means and standard deviations for `X`
324
+ and `Y` corresponding to every sample except those belonging to the given fold.
325
+ The statistics that can be computed depend on the arguments provided in the
326
+ constructor: `X` mean can be computed if `center_X` or `scale_X`, or `center_Y`
327
+ is `True`. `X` standard deviation can be computed if `scale_X` is True. `Y`
328
+ mean can be computed if `center_X ,`center_Y` or `scale_Y` is `True`, and `Y`
329
+ is provided. `Y` standard deviation can be computed if `scale_Y` is `True` and
330
+ `Y` is provided.
270
331
 
271
332
  Parameters
272
333
  ----------
273
- return_XTX : bool
274
- Whether to return the training set
275
- :math:`\mathbf{X}^{\mathbf{T}}\mathbf{W}\mathbf{X}`.
276
-
277
334
  fold : Hashable
278
- The fold for which to return the corresponding training set
279
- :math:`\mathbf{X}^{\mathbf{T}}\mathbf{W}\mathbf{X}` and
280
- :math:`\mathbf{X}^{\mathbf{T}}\mathbf{W}\mathbf{Y}`
281
-
282
- return_XTY : bool, optional, default=False
283
- Whether to return the training set
284
- :math:`\mathbf{X}^{\mathbf{T}}\mathbf{W}\mathbf{Y}`.
335
+ The fold for which to return the corresponding training statistics.
285
336
 
286
337
  Returns
287
338
  -------
288
- Array of shape (K, K) or (K, M) or tuple of arrays of shapes (K, K) and (K, M)
289
- The training set :math:`\mathbf{X}^{\mathbf{T}}\mathbf{W}\mathbf{X}` and/or
290
- training set :math:`\mathbf{X}^{\mathbf{T}}\mathbf{W}\mathbf{Y}`.
339
+ Tuple of four elements of Union[None, np.ndarray]
340
+ A tuple containing the row of column-wise weighted means for `X`, the row
341
+ of column-wise weighted standard deviations for `X`, the row of column-wise
342
+ weighted means for `Y`, and the row of column-wise weighted standard
343
+ deviations for `Y`. If a statistic is not computed, it is `None`.
291
344
 
292
345
  Raises
293
346
  ------
294
- ValueError
295
- If both `return_XTX` and `return_XTY` are `False` or if `return_XTY` is
296
- `True` and `Y` is `None`.
297
-
298
347
  ValueError
299
348
  If `fold` was not provided as a cross-validation split in the
300
349
  `folds` parameter of the constructor.
301
350
  """
302
- if not return_XTX and not return_XTY:
351
+ val_indices = self._get_val_indices(fold)
352
+ X_val, X_val_unweighted, Y_val, Y_val_unweighted = self._get_val_matrices(
353
+ val_indices=val_indices, return_XTY=self.Y_total is not None
354
+ )
355
+ return self._compute_training_stats(
356
+ val_indices=val_indices,
357
+ X_val=X_val,
358
+ X_val_unweighted=X_val_unweighted,
359
+ Y_val=Y_val,
360
+ Y_val_unweighted=Y_val_unweighted,
361
+ return_X_mean=self.center_X or self.scale_X,
362
+ return_X_std=self.scale_X,
363
+ return_Y_mean=(self.center_Y or self.scale_Y) and self.Y_total is not None,
364
+ return_Y_std=self.scale_Y and self.Y_total is not None,
365
+ )[
366
+ :-1
367
+ ] # Exclude the sum of training weights from the return tuple
368
+
369
+ def _get_sum_w_train_and_num_nonzero_w_train(
370
+ self, val_indices: npt.NDArray[np.int_]
371
+ ) -> Tuple[float, float]:
372
+ """
373
+ Returns a tuple containing the sum of weights in the training set and the number
374
+ of non-zero weights in the training set. If `w_total` is `None`, it returns the
375
+ size of the training set as both the sum of weights and the number of
376
+ non-zero weights.
377
+
378
+ Returns
379
+ -------
380
+ Tuple of floats
381
+ The sum of weights in the training set and the number of non-zero weights in
382
+ the training set.
383
+
384
+ Raises
385
+ ------
386
+ ValueError
387
+ If the number of non-zero weights in the training set is zero, which would
388
+ make it impossible to compute either of training set means or standard
389
+ deviations.
390
+ """
391
+ if self.w_total is None:
392
+ sum_w_val = np.asarray(val_indices.size, dtype=self.dtype)
393
+ sum_w_train = self.sum_w_total - sum_w_val
394
+ return (sum_w_train, sum_w_train)
395
+ w_val = self.w_total[val_indices]
396
+ sum_w_val = np.sum(w_val)
397
+ sum_w_train = self.sum_w_total - sum_w_val
398
+ num_nonzero_w_val = np.count_nonzero(w_val)
399
+ num_nonzero_w_train = np.asarray(
400
+ self.num_nonzero_w_total - num_nonzero_w_val, dtype=self.dtype
401
+ )
402
+ if num_nonzero_w_train == 0:
303
403
  raise ValueError(
304
- "At least one of `return_XTX` and `return_XTY` must be True."
404
+ "The number of non-zero weights in the training set must be "
405
+ "greater than zero."
305
406
  )
306
- if return_XTY and self.Y_total is None:
307
- raise ValueError("Response variables `Y` are not provided.")
308
- X_train_mean = None
309
- Y_train_mean = None
310
- X_train_std = None
311
- Y_train_std = None
312
- sum_w_train = None
313
- try:
314
- val_indices = self.folds_dict[fold]
315
- except KeyError as e:
316
- raise ValueError(f"Fold {fold} not found.") from e
317
- X_val = self.Xw_total[val_indices]
318
- if self.w_total is None:
319
- X_val_unweighted = X_val
320
- else:
321
- X_val_unweighted = self.X_total[val_indices]
322
- if return_XTY:
323
- if self.w_total is None or not (
324
- self.center_X or self.center_Y or self.scale_Y
325
- ):
326
- Y_val = self.Y_total[val_indices]
327
- Y_val_unweighted = Y_val
328
- else:
329
- Y_val = self.Yw_total[val_indices]
330
- Y_val_unweighted = self.Y_total[val_indices]
407
+ return sum_w_train, num_nonzero_w_train
408
+
409
+ def _compute_training_stats(
410
+ self,
411
+ val_indices: npt.NDArray[np.int_],
412
+ X_val: Union[None, np.ndarray],
413
+ X_val_unweighted: Union[None, np.ndarray],
414
+ Y_val: Union[None, np.ndarray],
415
+ Y_val_unweighted: Union[None, np.ndarray],
416
+ return_X_mean: bool,
417
+ return_X_std: bool,
418
+ return_Y_mean: bool,
419
+ return_Y_std: bool,
420
+ ) -> Tuple[
421
+ Union[None, np.ndarray],
422
+ Union[None, np.ndarray],
423
+ Union[None, np.ndarray],
424
+ Union[None, np.ndarray],
425
+ Union[None, float],
426
+ ]:
427
+ """
428
+ Computes the training set statistics for the given fold. The statistics include
429
+ the row of column-wise weighted means and standard deviations for `X` and `Y`.
430
+
431
+ Parameters
432
+ ----------
433
+ val_indices : Array of shape (N_val,)
434
+ The indices of the validation set samples for the given fold.
435
+
436
+ X_val : None or Array of shape (N_val, K)
437
+ The validation set of weighted predictor variables. If `None`, no
438
+ statistics for `X` are computed. Required if `return_X_mean` or
439
+ `return_X_std` is `True`.
440
+
441
+ X_val_unweighted : None or Array of shape (N_val, K)
442
+ The validation set of unweighted predictor variables. Required if
443
+ `return_X_std` is `True`.
444
+
445
+ Y_val : None or Array of shape (N_val, M)
446
+ The validation set of weighted response variables. If `None`, no statistics
447
+ for `Y` are computed. Required if `return_Y_mean` or `return_Y_std` is
448
+ `True`.
449
+
450
+ Y_val_unweighted : None or Array of shape (N_val, M)
451
+ The validation set of unweighted response variables. Required if
452
+ `return_Y_std` is `True`.
453
+
454
+ return_X_mean : bool
455
+ Whether to compute the row of column-wise weighted means for `X`.
456
+
457
+ return_X_std : bool
458
+ Whether to compute the row of column-wise weighted standard deviations for
459
+ `X`.
460
+
461
+ return_Y_mean : bool
462
+ Whether to compute the row of column-wise weighted means for `Y`.
463
+
464
+ return_Y_std : bool
465
+ Whether to compute the row of column-wise weighted standard deviations for
466
+ `Y`.
467
+
468
+ Returns
469
+ -------
470
+ Tuple of Union[None, np.ndarray]
471
+ A tuple containing the row of column-wise weighted means for `X`, the row
472
+ of column-wise weighted standard deviations for `X`, the row of column-wise
473
+ weighted means for `Y`, the row of column-wise weighted standard deviations
474
+ for `Y`, and the sum of training weights. If a statistic is not computed,
475
+ it is `None`.
476
+ """
331
477
  if (
332
- self.center_X
333
- or self.scale_X
334
- or (return_XTY and (self.center_Y or self.scale_Y))
478
+ not return_X_mean
479
+ and not return_X_std
480
+ and not return_Y_mean
481
+ and not return_Y_std
335
482
  ):
336
- if self.w_total is None:
337
- sum_w_val = np.asarray(val_indices.size, dtype=self.dtype)
338
- sum_w_train = self.sum_w_total - sum_w_val
339
- num_nonzero_w_train = sum_w_train
340
- else:
341
- w_val = self.w_total[val_indices]
342
- sum_w_val = np.sum(w_val)
343
- sum_w_train = self.sum_w_total - sum_w_val
344
- num_nonzero_w_val = np.count_nonzero(w_val)
345
- num_nonzero_w_train = np.asarray(
346
- self.num_nonzero_w_total - num_nonzero_w_val, dtype=self.dtype
347
- )
348
- if num_nonzero_w_train == 0:
349
- raise ValueError(
350
- "The number of non-zero weights in the training set must be "
351
- "greater than zero."
352
- )
353
- if self.center_X or self.scale_X or (return_XTY and self.center_Y):
483
+ return None, None, None, None, None
484
+ sum_w_train, num_nonzero_w_train = (
485
+ self._get_sum_w_train_and_num_nonzero_w_train(val_indices)
486
+ )
487
+ if return_X_mean or return_X_std:
354
488
  sum_X_val = np.sum(X_val, axis=0, keepdims=True)
355
489
  X_train_mean = self._compute_training_mat_mean(
356
490
  sum_X_val,
357
491
  self.sum_X_total,
358
492
  sum_w_train,
359
493
  )
360
- if return_XTY and (self.center_X or self.center_Y or self.scale_Y):
494
+ if return_Y_mean or return_Y_std:
361
495
  sum_Y_val = np.sum(Y_val, axis=0, keepdims=True)
362
496
  Y_train_mean = self._compute_training_mat_mean(
363
497
  sum_Y_val,
364
498
  self.sum_Y_total,
365
499
  sum_w_train,
366
500
  )
367
- if self.scale_X or (self.scale_Y and return_XTY):
501
+ if return_X_std or return_Y_std:
368
502
  divisor = self._compute_std_divisor(sum_w_train, num_nonzero_w_train)
369
- if self.scale_X:
503
+ if return_X_std:
370
504
  X_train_std = self._compute_training_mat_std(
371
505
  sum_X_val,
372
506
  X_val,
@@ -377,7 +511,7 @@ class CVMatrix:
377
511
  sum_w_train,
378
512
  divisor,
379
513
  )
380
- if self.scale_Y and return_XTY:
514
+ if return_Y_std:
381
515
  Y_train_std = self._compute_training_mat_std(
382
516
  sum_Y_val,
383
517
  Y_val,
@@ -388,7 +522,135 @@ class CVMatrix:
388
522
  sum_w_train,
389
523
  divisor,
390
524
  )
525
+ return (
526
+ X_train_mean if return_X_mean else None,
527
+ X_train_std if return_X_std else None,
528
+ Y_train_mean if return_Y_mean else None,
529
+ Y_train_std if return_Y_std else None,
530
+ sum_w_train,
531
+ )
532
+
533
+ def _training_matrices(
534
+ self, return_XTX: bool, return_XTY: bool, fold: Hashable
535
+ ) -> Tuple[
536
+ Union[np.ndarray, Tuple[np.ndarray, np.ndarray]],
537
+ Tuple[
538
+ Union[None, np.ndarray],
539
+ Union[None, np.ndarray],
540
+ Union[None, np.ndarray],
541
+ Union[None, np.ndarray],
542
+ ],
543
+ ]:
544
+ """
545
+ Returns the training set :math:`\mathbf{X}^{\mathbf{T}}\mathbf{W}\mathbf{X}`
546
+ and/or :math:`\mathbf{X}^{\mathbf{T}}\mathbf{W}\mathbf{Y}` corresponding to
547
+ every sample except those belonging to the given fold.
548
+
549
+ Parameters
550
+ ----------
551
+ return_XTX : bool
552
+ Whether to return the training set
553
+ :math:`\mathbf{X}^{\mathbf{T}}\mathbf{W}\mathbf{X}`.
554
+
555
+ fold : Hashable
556
+ The fold for which to return the corresponding training set
557
+ :math:`\mathbf{X}^{\mathbf{T}}\mathbf{W}\mathbf{X}` and
558
+ :math:`\mathbf{X}^{\mathbf{T}}\mathbf{W}\mathbf{Y}`
559
+
560
+ return_XTY : bool, optional, default=False
561
+ Whether to return the training set
562
+ :math:`\mathbf{X}^{\mathbf{T}}\mathbf{W}\mathbf{Y}`.
563
+
564
+ Returns
565
+ -------
566
+ Tuple of two elements. The first element is an array of shape (K, K) or (K, M)
567
+ or a tuple of arrays of shapes (K, K) and (K, M). These are the training
568
+ set :math:`\mathbf{X}^{\mathbf{T}}\mathbf{W}\mathbf{X}` and/or
569
+ training set :math:`\mathbf{X}^{\mathbf{T}}\mathbf{W}\mathbf{Y}`. The
570
+ second element is a tuple containing the row of column-wise weighted means
571
+ for `X`, the row of column-wise weighted standard deviations for `X`, the
572
+ row of column-wise weighted means for `Y`, and the row of column-wise
573
+ weighted standard deviations for `Y`. If a statistic is not computed, it is
574
+ `None`.
575
+
576
+ Raises
577
+ ------
578
+ ValueError
579
+ If both `return_XTX` and `return_XTY` are `False` or if `return_XTY` is
580
+ `True` and `Y` is `None`.
581
+
582
+ ValueError
583
+ If `fold` was not provided as a cross-validation split in the
584
+ `folds` parameter of the constructor.
585
+ """
586
+ if not return_XTX and not return_XTY:
587
+ raise ValueError(
588
+ "At least one of `return_XTX` and `return_XTY` must be True."
589
+ )
590
+ if return_XTY and self.Y_total is None:
591
+ raise ValueError("Response variables `Y` are not provided.")
592
+ val_indices = self._get_val_indices(fold)
593
+ X_val, X_val_unweighted, Y_val, Y_val_unweighted = self._get_val_matrices(
594
+ val_indices=val_indices, return_XTY=return_XTY
595
+ )
596
+ X_train_mean, X_train_std, Y_train_mean, Y_train_std, sum_w_train = (
597
+ self._compute_training_stats(
598
+ val_indices=val_indices,
599
+ X_val=(
600
+ X_val
601
+ if self.center_X or self.scale_X or (return_XTY and self.center_Y)
602
+ else None
603
+ ),
604
+ X_val_unweighted=X_val_unweighted if self.scale_X else None,
605
+ Y_val=(
606
+ Y_val
607
+ if return_XTY and (self.center_X or self.center_Y or self.scale_Y)
608
+ else None
609
+ ),
610
+ Y_val_unweighted=(
611
+ Y_val_unweighted if return_XTY and self.scale_Y else None
612
+ ),
613
+ return_X_mean=self.center_X or (return_XTY and self.center_Y),
614
+ return_Y_mean=return_XTY and (self.center_X or self.center_Y),
615
+ return_X_std=self.scale_X,
616
+ return_Y_std=return_XTY and self.scale_Y,
617
+ )
618
+ )
619
+ stats_tuple = (
620
+ X_train_mean,
621
+ X_train_std,
622
+ Y_train_mean,
623
+ Y_train_std,
624
+ )
391
625
  if return_XTX and return_XTY:
626
+ return (
627
+ (
628
+ self._training_kernel_matrix(
629
+ self.XTX_total,
630
+ X_val,
631
+ X_val_unweighted,
632
+ X_train_mean,
633
+ X_train_mean,
634
+ X_train_std,
635
+ X_train_std,
636
+ sum_w_train,
637
+ center=self.center_X,
638
+ ),
639
+ self._training_kernel_matrix(
640
+ self.XTY_total,
641
+ X_val,
642
+ Y_val_unweighted,
643
+ X_train_mean,
644
+ Y_train_mean,
645
+ X_train_std,
646
+ Y_train_std,
647
+ sum_w_train,
648
+ center=self.center_X or self.center_Y,
649
+ ),
650
+ ),
651
+ stats_tuple,
652
+ )
653
+ if return_XTX:
392
654
  return (
393
655
  self._training_kernel_matrix(
394
656
  self.XTX_total,
@@ -401,42 +663,87 @@ class CVMatrix:
401
663
  sum_w_train,
402
664
  center=self.center_X,
403
665
  ),
404
- self._training_kernel_matrix(
405
- self.XTY_total,
406
- X_val,
407
- Y_val_unweighted,
408
- X_train_mean,
409
- Y_train_mean,
410
- X_train_std,
411
- Y_train_std,
412
- sum_w_train,
413
- center=self.center_X or self.center_Y,
414
- ),
666
+ stats_tuple,
415
667
  )
416
- if return_XTX:
417
- return self._training_kernel_matrix(
418
- self.XTX_total,
668
+ return (
669
+ self._training_kernel_matrix(
670
+ self.XTY_total,
419
671
  X_val,
420
- X_val_unweighted,
421
- X_train_mean,
672
+ Y_val_unweighted,
422
673
  X_train_mean,
674
+ Y_train_mean,
423
675
  X_train_std,
424
- X_train_std,
676
+ Y_train_std,
425
677
  sum_w_train,
426
- center=self.center_X,
427
- )
428
- return self._training_kernel_matrix(
429
- self.XTY_total,
430
- X_val,
431
- Y_val_unweighted,
432
- X_train_mean,
433
- Y_train_mean,
434
- X_train_std,
435
- Y_train_std,
436
- sum_w_train,
437
- center=self.center_X or self.center_Y,
678
+ center=self.center_X or self.center_Y,
679
+ ),
680
+ stats_tuple,
438
681
  )
439
682
 
683
+ def _get_val_indices(self, fold: Hashable) -> npt.NDArray[np.int_]:
684
+ """
685
+ Returns the indices of the validation set samples for a given fold.
686
+ Parameters
687
+ ----------
688
+ fold : Hashable
689
+ The fold for which to return the validation set indices.
690
+ Returns
691
+ -------
692
+ Array of shape (N_val,)
693
+ The indices of the validation set samples for the given fold.
694
+ """
695
+ try:
696
+ val_indices = self.folds_dict[fold]
697
+ except KeyError as e:
698
+ raise ValueError(f"Fold {fold} not found.") from e
699
+ return val_indices
700
+
701
+ def _get_val_matrices(
702
+ self, val_indices: npt.NDArray[np.int_], return_XTY: bool
703
+ ) -> Tuple[
704
+ np.ndarray,
705
+ np.ndarray,
706
+ np.ndarray,
707
+ Union[None, np.ndarray],
708
+ Union[None, np.ndarray],
709
+ ]:
710
+ """
711
+ Returns the validation set matrices for a given fold.
712
+ Parameters
713
+ ----------
714
+ val_indices : Array of shape (N_val,)
715
+ The indices of the validation set samples for the given fold.
716
+ return_XTY : bool
717
+ Whether to return the validation set of response variables `Y`. If `False`,
718
+ the returned `Y_val` and `Y_val_unweighted` will be `None`.
719
+ Returns
720
+ -------
721
+ Tuple of arrays of shapes (N_val, K), (N_val, K), (N_val, M), and (N_val, M)
722
+ The validation set of predictor variables `X`, the validation set of
723
+ unweighted predictor variables `X_unweighted`, the validation set of
724
+ response variables `Y`, and the validation set of unweighted response
725
+ variables `Y_unweighted`. If `return_XTY` is `False`, `Y` and
726
+ `Y_unweighted` will be `None`.
727
+ """
728
+ X_val = self.Xw_total[val_indices]
729
+ if self.w_total is None:
730
+ X_val_unweighted = X_val
731
+ else:
732
+ X_val_unweighted = self.X_total[val_indices]
733
+ if return_XTY:
734
+ if self.w_total is None or not (
735
+ self.center_X or self.center_Y or self.scale_Y
736
+ ):
737
+ Y_val = self.Y_total[val_indices]
738
+ Y_val_unweighted = Y_val
739
+ else:
740
+ Y_val = self.Yw_total[val_indices]
741
+ Y_val_unweighted = self.Y_total[val_indices]
742
+ else:
743
+ Y_val = None
744
+ Y_val_unweighted = None
745
+ return X_val, X_val_unweighted, Y_val, Y_val_unweighted
746
+
440
747
  def _training_kernel_matrix(
441
748
  self,
442
749
  total_kernel_mat: np.ndarray,
@@ -455,8 +762,8 @@ class CVMatrix:
455
762
  Parameters
456
763
  ----------
457
764
  total_kernel_mat : Array of shape (N, K) or (N, M)
458
- The total kernel matrix :math:`\mathbf{X}^{\mathbf{T}}\mathbf{X}` or
459
- :math:`\mathbf{X}^{\mathbf{T}}\mathbf{Y}`.
765
+ The total kernel matrix :math:`\mathbf{X}^{\mathbf{T}}\mathbf{W}\mathbf{X}`
766
+ or :math:`\mathbf{X}^{\mathbf{T}}\mathbf{W}\mathbf{Y}`.
460
767
 
461
768
  X_val : Array of shape (N_val, K)
462
769
  The validation set of predictor variables.
@@ -619,7 +926,7 @@ class CVMatrix:
619
926
  ) / divisor
620
927
  mat_train_var[mat_train_var < 0] = 0
621
928
  mat_train_std = np.sqrt(mat_train_var)
622
- mat_train_std[np.abs(mat_train_std) <= self.eps] = 1
929
+ mat_train_std[np.abs(mat_train_std) <= self.resolution] = 1
623
930
  return mat_train_std
624
931
 
625
932
  def _init_mat(self, mat: np.ndarray) -> np.ndarray:
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "cvmatrix"
3
- version = "2.0.2"
3
+ version = "2.1.0"
4
4
  description = "Fast computation of possibly weighted and possibly centered/scaled training set kernel matrices in a cross-validation setting."
5
5
  authors = ["Sm00thix <oleemail@icloud.com>"]
6
6
  maintainers = ["Sm00thix <oleemail@icloud.com>"]
@@ -1 +0,0 @@
1
- __version__ = "2.0.2"
File without changes
File without changes