umappp 0.1.6 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -16,6 +16,9 @@
16
16
  * @brief Implements the main user-visible class for running IRLBA.
17
17
  */
18
18
 
19
+ /**
20
+ * @brief Implements the IRLBA algorithm for approximate SVD.
21
+ */
19
22
  namespace irlba {
20
23
 
21
24
  /**
@@ -89,10 +92,9 @@ public:
89
92
  }
90
93
 
91
94
  /**
92
- * Set the maximum number of restart iterations.
93
- * In most cases, convergence will occur before reaching this limit.
95
+ * Set the seed for the creation of random vectors, primarily during initialization of the IRLBA algorithm.
94
96
  *
95
- * @param m Maximum number of iterations.
97
+ * @param s Seed value.
96
98
  *
97
99
  * @return A reference to the `Irlba` instance.
98
100
  */
@@ -155,7 +157,7 @@ public:
155
157
  * Run IRLBA on an input matrix to perform an approximate SVD, with arbitrary centering and scaling operations.
156
158
  *
157
159
  * @tparam M Matrix class, typically from the **Eigen** matrix manipulation library.
158
- * However, other classes are also supported, see the other `run()` methods for details.
160
+ * However, other classes are also supported, see `wrappers.hpp` for details.
159
161
  * @tparam Engine A (pseudo-)random number generator class, returning a randomly sampled value when called as a functor with no arguments.
160
162
  *
161
163
  * @param[in] mat Input matrix.
@@ -273,24 +275,7 @@ public:
273
275
  * and the second entry indicates the number of restart iterations performed.
274
276
  *
275
277
  * Custom classes can be used to define modified matrices that cannot be efficiently realized into the standard **Eigen** classes.
276
- * We expect:
277
- * - A `rows()` method that returns the number of rows.
278
- * - A `cols()` method that returns the number of columns.
279
- * - One of the following for matrix-vector multiplication:
280
- * - `multiply(rhs, out)`, which should compute the product of the matrix with `rhs`, a `Eigen::VectorXd`-equivalent of length equal to the number of columns;
281
- * and stores the result in `out`, an `Eigen::VectorXd` of length equal to the number of rows.
282
- * - A `*` method where the right-hand side is an `Eigen::VectorXd` (or equivalent expression) of length equal to the number of columsn,
283
- * and returns an `Eigen::VectorXd`-equivalent of length equal to the number of rows.
284
- * - One of the following for matrix transpose-vector multiplication:
285
- * - `adjoint_multiply(rhs, out)`, which should compute the product of the matrix transpose with `rhs`, a `Eigen::VectorXd`-equivalent of length equal to the number of rows;
286
- * and stores the result in `out`, an `Eigen::VectorXd` of length equal to the number of columns.
287
- * - An `adjoint()` method that returns an instance of any class that has a `*` method for matrix-vector multiplication.
288
- * The method should accept an `Eigen::VectorXd`-equivalent of length equal to the number of rows,
289
- * and return an `Eigen::VectorXd`-equvialent of length equal to the number of columns.
290
- * - A `realize()` method that returns an `Eigen::MatrixXd` object representing the modified matrix.
291
- * This can be omitted if an `Eigen::MatrixXd` can be copy-constructed from the class.
292
- *
293
- * See the `Centered` and `Scaled` classes for more details.
278
+ * See the `wrappers.hpp` file for more details, along with the `Centered` and `Scaled` classes.
294
279
  *
295
280
  * If the smallest dimension of `mat` is below 6, this method falls back to performing an exact SVD.
296
281
  */
@@ -485,7 +470,7 @@ private:
485
470
 
486
471
  public:
487
472
  /**
488
- * Result of the IRLBA-based decomposition.
473
+ * @brief Result of the IRLBA-based decomposition.
489
474
  */
490
475
  struct Results {
491
476
  /**
@@ -509,12 +494,12 @@ public:
509
494
  Eigen::VectorXd D;
510
495
 
511
496
  /**
512
- * Whether the algorithm converged.
497
+ * The number of restart iterations performed.
513
498
  */
514
499
  int iterations;
515
500
 
516
501
  /**
517
- * The number of restart iterations performed.
502
+ * Whether the algorithm converged.
518
503
  */
519
504
  bool converged;
520
505
  };
@@ -523,7 +508,7 @@ public:
523
508
  * Run IRLBA on an input matrix to perform an approximate SVD with centering and scaling.
524
509
  *
525
510
  * @tparam M Matrix class, most typically from the **Eigen** matrix manipulation library.
526
- * However, other classes are also supported, see the other `run()` methods for details.
511
+ * However, other classes are also supported, see `wrappers.hpp` for details.
527
512
  * @tparam Engine A (pseudo-)random number generator class, returning a randomly sampled value when called as a functor with no arguments.
528
513
  *
529
514
  * @param[in] mat Input matrix.
@@ -549,7 +534,7 @@ public:
549
534
  * Run IRLBA on an input matrix to perform an approximate SVD, see the `run()` method for more details.
550
535
  *
551
536
  * @tparam M Matrix class, most typically from the **Eigen** matrix manipulation library.
552
- * However, other classes are also supported, see the other `run()` methods for details.
537
+ * However, other classes are also supported, see `wrappers.hpp` for details.
553
538
  * @tparam Engine A (pseudo-)random number generator class, returning a randomly sampled value when called as a functor with no arguments.
554
539
  *
555
540
  * @param[in] mat Input matrix.
@@ -44,16 +44,22 @@ public:
44
44
 
45
45
  public:
46
46
  /**
47
- * @brief Intermediate data structures to avoid repeated allocations.
47
+ * @tparam M Some kind of matrix class, either from the **Eigen** library or one of **irlba**'s wrappers.
48
+ *
49
+ * @brief Intermediate data structures to avoid repeated allocations on `run()`.
48
50
  */
51
+ template<class M>
49
52
  struct Intermediates {
50
53
  /**
51
- * @tparam M Matrix class, most typically from the **Eigen** library.
52
- *
53
54
  * @param mat Instance of a matrix class `M`.
54
55
  */
55
- template<class M>
56
- Intermediates(const M& mat) : F(mat.cols()), W_next(mat.rows()), orthog_tmp(mat.cols()) {}
56
+ Intermediates(const M& mat) :
57
+ F(mat.cols()),
58
+ W_next(mat.rows()),
59
+ orthog_tmp(mat.cols()),
60
+ work(wrapped_workspace(&mat)),
61
+ awork(wrapped_adjoint_workspace(&mat))
62
+ {}
57
63
 
58
64
  /**
59
65
  * Obtain the residual vector, see algorithm 2.1 of Baglama and Reichel (2005).
@@ -70,13 +76,19 @@ public:
70
76
  Eigen::VectorXd F;
71
77
  Eigen::VectorXd W_next;
72
78
  Eigen::VectorXd orthog_tmp;
79
+ WrappedWorkspace<M> work;
80
+ WrappedAdjointWorkspace<M> awork;
73
81
  /**
74
82
  * @endcond
75
83
  */
76
84
  };
77
85
 
86
+ /**
87
+ * @tparam M Some matrix class, either from the **Eigen** library or one of **irlba**'s wrappers.
88
+ * @return An `Intermediates` object for subsequent calls to `run()` on `mat`.
89
+ */
78
90
  template<class M>
79
- Intermediates initialize(const M& mat) const {
91
+ Intermediates<M> initialize(const M& mat) const {
80
92
  return Intermediates(mat);
81
93
  }
82
94
 
@@ -92,20 +104,19 @@ public:
92
104
  * @tparam Engine A functor that, when called with no arguments, returns a random integer from a discrete uniform distribution.
93
105
  *
94
106
  * @param mat Input matrix.
95
- * @param W Output matrix with number of rows equal to `mat.rows()`.
107
+ * @param[in, out] W Output matrix with number of rows equal to `mat.rows()`.
96
108
  * The size of the working subspace is defined from the number of columns.
97
109
  * The first `start` columns should contain orthonormal column vectors with non-zero L2 norms.
98
- * @param V Matrix with number of rows equal to `mat.cols()` and number of columns equal to `W.cols()`.
110
+ * On output, the rest of `W` is filled with orthonormal vectors.
111
+ * @param[in, out] V Matrix with number of rows equal to `mat.cols()` and number of columns equal to `W.cols()`.
99
112
  * The first `start + 1` columns should contain orthonormal column vectors with non-zero L2 norms.
100
- * @param B Square matrix with number of rows and columns equal to the size of the working subspace.
113
+ * On output, the rest of `V` is filled with orthonormal vectors.
114
+ * @param[in, out] B Square matrix with number of rows and columns equal to the size of the working subspace.
101
115
  * Number of values is defined by `set_number()`.
116
+ * On output, `B` is filled with upper diagonal entries, starting from the `start`-th row/column.
102
117
  * @param eng An instance of a random number `Engine`.
103
118
  * @param inter Collection of intermediate data structures generated by calling `initialize()` on `mat`.
104
119
  * @param start The dimension from which to start the bidiagonalization.
105
- *
106
- * @return
107
- * `W` is filled with orthonormal vectors, as is `V`.
108
- * `B` is filled with upper diagonal entries.
109
120
  */
110
121
  template<class M, class Engine>
111
122
  void run(
@@ -114,7 +125,7 @@ public:
114
125
  Eigen::MatrixXd& V,
115
126
  Eigen::MatrixXd& B,
116
127
  Engine& eng,
117
- Intermediates& inter,
128
+ Intermediates<M>& inter,
118
129
  int start = 0)
119
130
  const {
120
131
  const double eps = (epsilon < 0 ? std::pow(std::numeric_limits<double>::epsilon(), 0.8) : epsilon);
@@ -125,31 +136,23 @@ public:
125
136
  auto& otmp = inter.orthog_tmp;
126
137
 
127
138
  F = V.col(start);
128
- if constexpr(has_multiply_method<M>::value) {
129
- W_next.noalias() = mat * F;
130
- } else {
131
- mat.multiply(F, W_next);
132
- }
139
+ wrapped_multiply(&mat, F, inter.work, W_next); // i.e., W_next = mat * F;
133
140
 
134
- // If start = 0, we assume that it's already normalized, see argument description for 'V'.
141
+ // If start = 0, there's nothing to orthogonalize against.
135
142
  if (start) {
136
143
  orthogonalize_vector(W, W_next, start, otmp);
137
144
  }
138
145
 
139
146
  double S = W_next.norm();
140
147
  if (S < eps) {
141
- throw -4;
148
+ throw std::runtime_error("starting vector near the null space of the input matrix");
142
149
  }
143
150
  W_next /= S;
144
151
  W.col(start) = W_next;
145
152
 
146
153
  // The Lanczos iterations themselves.
147
154
  for (int j = start; j < work; ++j) {
148
- if constexpr(has_adjoint_multiply_method<M>::value) {
149
- F.noalias() = mat.adjoint() * W.col(j);
150
- } else {
151
- mat.adjoint_multiply(W.col(j), F);
152
- }
155
+ wrapped_adjoint_multiply(&mat, W.col(j), inter.awork, F); // i.e., F = mat.adjoint() * W.col(j);
153
156
 
154
157
  F -= S * V.col(j); // equivalent to daxpy.
155
158
  orthogonalize_vector(V, F, j + 1, otmp);
@@ -172,11 +175,7 @@ public:
172
175
  B(j, j) = S;
173
176
  B(j, j + 1) = R_F;
174
177
 
175
- if constexpr(has_multiply_method<M>::value) {
176
- W_next.noalias() = mat * F;
177
- } else {
178
- mat.multiply(F, W_next);
179
- }
178
+ wrapped_multiply(&mat, F, inter.work, W_next); // i.e., W_next = mat * F;
180
179
 
181
180
  // Full re-orthogonalization, using the left-most 'j + 1' columns of W.
182
181
  // Recall that W_next will be the 'j + 2'-th column, i.e., W.col(j + 1) in
@@ -235,20 +235,6 @@ private:
235
235
  private:
236
236
  template<class Right>
237
237
  void indirect_multiply(const Right& rhs, Eigen::VectorXd& output) const {
238
- if constexpr(has_data_method<Right>::value) {
239
- // If it has a .data() method, the data values are already computed
240
- // and sitting in memory, so we just use that directly.
241
- indirect_multiply_internal(rhs, output);
242
- } else {
243
- // Otherwise, it is presumably an expression that involves some work
244
- // to get the values. We realize it into a VectorXd to ensure that
245
- // it is not repeatedly evaluated on each access to 'rhs'.
246
- indirect_multiply_internal(Eigen::VectorXd(rhs), output);
247
- }
248
- }
249
-
250
- template<class Right>
251
- void indirect_multiply_internal(const Right& rhs, Eigen::VectorXd& output) const {
252
238
  output.setZero();
253
239
 
254
240
  if (nthreads == 1) {
@@ -270,8 +256,8 @@ private:
270
256
  IRLBA_CUSTOM_PARALLEL(nthreads, [&](int t) -> void {
271
257
  #endif
272
258
 
273
- auto starts = secondary_nonzero_starts[t];
274
- auto ends = secondary_nonzero_starts[t + 1];
259
+ const auto& starts = secondary_nonzero_starts[t];
260
+ const auto& ends = secondary_nonzero_starts[t + 1];
275
261
  for (size_t c = 0; c < primary_dim; ++c) {
276
262
  auto start = starts[c];
277
263
  auto end = ends[c];
@@ -293,20 +279,6 @@ private:
293
279
  private:
294
280
  template<class Right>
295
281
  void direct_multiply(const Right& rhs, Eigen::VectorXd& output) const {
296
- if constexpr(has_data_method<Right>::value) {
297
- // If it has a .data() method, the data values are already computed
298
- // and sitting in memory, so we just use that directly.
299
- direct_multiply_internal(rhs, output);
300
- } else {
301
- // Otherwise, it is presumably an expression that involves some work
302
- // to get the values. We realize it into a VectorXd to ensure that
303
- // it is not repeatedly evaluated on each access to 'rhs'.
304
- direct_multiply_internal(Eigen::VectorXd(rhs), output);
305
- }
306
- }
307
-
308
- template<class Right>
309
- void direct_multiply_internal(const Right& rhs, Eigen::VectorXd& output) const {
310
282
  if (nthreads == 1) {
311
283
  for (size_t c = 0; c < primary_dim; ++c) {
312
284
  output.coeffRef(c) = column_dot_product(c, rhs);
@@ -346,18 +318,45 @@ private:
346
318
  return dot;
347
319
  }
348
320
 
321
+ public:
322
+ /**
323
+ * Workspace type for `multiply()`.
324
+ * Currently this is a placeholder.
325
+ */
326
+ typedef bool Workspace;
327
+
328
+ /**
329
+ * @return Workspace for use in `multiply()`.
330
+ */
331
+ bool workspace() const {
332
+ return false;
333
+ }
334
+
335
+ /**
336
+ * Workspace type for `adjoint_multiply()`.
337
+ * Currently this is a placeholder.
338
+ */
339
+ typedef bool AdjointWorkspace;
340
+
341
+ /**
342
+ * @return Workspace for use in `adjoint_multiply()`.
343
+ */
344
+ bool adjoint_workspace() const {
345
+ return false;
346
+ }
347
+
349
348
  public:
350
349
  /**
351
350
  * @tparam Right An `Eigen::VectorXd` or equivalent expression.
352
351
  *
353
352
  * @param[in] rhs The right-hand side of the matrix product.
354
353
  * This should be a vector or have only one column.
355
- * @param[out] out The output vector to store the matrix product.
356
- *
357
- * @return `out` is filled with the product of this matrix and `rhs`.
354
+ * @param work The return value of `workspace()`.
355
+ * @param[out] output The output vector to store the matrix product.
356
+ * This is filled with the product of this matrix and `rhs`.
358
357
  */
359
358
  template<class Right>
360
- void multiply(const Right& rhs, Eigen::VectorXd& output) const {
359
+ void multiply(const Right& rhs, Workspace& work, Eigen::VectorXd& output) const {
361
360
  if constexpr(column_major) {
362
361
  indirect_multiply(rhs, output);
363
362
  } else {
@@ -370,12 +369,12 @@ public:
370
369
  *
371
370
  * @param[in] rhs The right-hand side of the matrix product.
372
371
  * This should be a vector or have only one column.
373
- * @param[out] out The output vector to store the matrix product.
374
- *
375
- * @return `out` is filled with the product of the transpose of this matrix and `rhs`.
372
+ * @param work The return value of `adjoint_workspace()`.
373
+ * @param[out] output The output vector to store the matrix product.
374
+ * This is filled with the product of the transpose of this matrix and `rhs`.
376
375
  */
377
376
  template<class Right>
378
- void adjoint_multiply(const Right& rhs, Eigen::VectorXd& output) const {
377
+ void adjoint_multiply(const Right& rhs, AdjointWorkspace& work, Eigen::VectorXd& output) const {
379
378
  if constexpr(column_major) {
380
379
  direct_multiply(rhs, output);
381
380
  } else {
@@ -18,12 +18,11 @@ namespace irlba {
18
18
  * Orthogonalize a vector against a set of orthonormal column vectors.
19
19
  *
20
20
  * @param mat A matrix where the left-most `ncols` columns are orthonormal vectors.
21
- * @param vec The vector of interest, of length equal to the number of rows in `mat`.
21
+ * @param[in, out] vec The vector of interest, of length equal to the number of rows in `mat`.
22
+ * On output, this is modified to contain `vec - mat0 * t(mat0) * vec`, where `mat0` is defined as the first `ncols` columns of `mat`.
23
+ * This ensures that it is orthogonal to each column of `mat0`.
22
24
  * @param tmp A vector of length equal to `mat.cols()`, used to store intermediate matrix products.
23
25
  * @param ncols Number of left-most columns of `mat` to use.
24
- *
25
- * @return `vec` is modified to contain `vec - mat0 * t(mat0) * vec`, where `mat0` is defined as the first `ncols` columns of `mat`.
26
- * This ensures that it is orthogonal to each column of `mat0`.
27
26
  */
28
27
  inline void orthogonalize_vector(const Eigen::MatrixXd& mat, Eigen::VectorXd& vec, size_t ncols, Eigen::VectorXd& tmp) {
29
28
  tmp.head(ncols).noalias() = mat.leftCols(ncols).adjoint() * vec;
@@ -34,13 +33,12 @@ inline void orthogonalize_vector(const Eigen::MatrixXd& mat, Eigen::VectorXd& ve
34
33
  /**
35
34
  * Fill an **Eigen** vector with random normals via **aarand**.
36
35
  *
37
- * @param Vec Any **Eigen** vector class or equivalent proxy object.
38
- * @param Engine A (pseudo-)random number generator class that returns a random number when called with no arguments.
36
+ * @tparam Vec Any **Eigen** vector class or equivalent proxy object.
37
+ * @tparam Engine A (pseudo-)random number generator class that returns a random number when called with no arguments.
39
38
  *
40
- * @param vec Instance of a `Vec` class.
39
+ * @param[out] vec Instance of a `Vec` class.
40
+ * This is filled with random draws from a standard normal distribution.
41
41
  * @param eng Instance of an `Engine` class.
42
- *
43
- * @return `vec` is filled with random draws from a standard normal distribution.
44
42
  */
45
43
  template<class Vec, class Engine>
46
44
  void fill_with_random_normals(Vec& vec, Engine& eng) {
@@ -77,13 +75,13 @@ struct ColumnVectorProxy {
77
75
  /**
78
76
  * Fill a column of an **Eigen** matrix with random normals via **aarand**.
79
77
  *
80
- * @param Matrix Any **Eigen** matrix class or equivalent proxy object.
81
- * @param Engine A (pseudo-)random number generator class that returns a random number when called with no arguments.
78
+ * @tparam Matrix Any **Eigen** matrix class or equivalent proxy object.
79
+ * @tparam Engine A (pseudo-)random number generator class that returns a random number when called with no arguments.
82
80
  *
83
81
  * @param mat Instance of a `Matrix` class.
82
+ * The `column` column of this matrix is filled with random draws from a standard normal distribution.
83
+ * @param column Column of `mat` to be filled.
84
84
  * @param eng Instance of an `Engine` class.
85
- *
86
- * @return The `column` column of `mat` is filled with random draws from a standard normal distribution.
87
85
  */
88
86
  template<class Matrix, class Engine>
89
87
  void fill_with_random_normals(Matrix& mat, int column, Engine& eng) {
@@ -145,6 +143,7 @@ public:
145
143
  *
146
144
  * @param sv Vector of singular values.
147
145
  * @param residuals Vector of residuals for each singular value/vector.
146
+ * @param last Vector of singular values from the previous iteration.
148
147
  *
149
148
  * @return The number of singular values/vectors that have achieved convergence.
150
149
  */
@@ -205,16 +204,6 @@ template<class M>
205
204
  struct has_realize_method<M, decltype((void) std::declval<M>().realize(), 0)> {
206
205
  static constexpr bool value = std::is_same<decltype(std::declval<M>().realize()), Eigen::MatrixXd>::value;
207
206
  };
208
-
209
- template<class M, typename = int>
210
- struct has_data_method {
211
- static constexpr bool value = false;
212
- };
213
-
214
- template<class M>
215
- struct has_data_method<M, decltype((void) (std::declval<M>().data()), 0)> {
216
- static constexpr bool value = true;
217
- };
218
207
  /**
219
208
  * @endcond
220
209
  */