umappp 0.1.5 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -16,6 +16,9 @@
16
16
  * @brief Implements the main user-visible class for running IRLBA.
17
17
  */
18
18
 
19
+ /**
20
+ * @brief Implements the IRLBA algorithm for approximate SVD.
21
+ */
19
22
  namespace irlba {
20
23
 
21
24
  /**
@@ -89,10 +92,9 @@ public:
89
92
  }
90
93
 
91
94
  /**
92
- * Set the maximum number of restart iterations.
93
- * In most cases, convergence will occur before reaching this limit.
95
+ * Set the seed for the creation of random vectors, primarily during initialization of the IRLBA algorithm.
94
96
  *
95
- * @param m Maximum number of iterations.
97
+ * @param s Seed value.
96
98
  *
97
99
  * @return A reference to the `Irlba` instance.
98
100
  */
@@ -155,7 +157,7 @@ public:
155
157
  * Run IRLBA on an input matrix to perform an approximate SVD, with arbitrary centering and scaling operations.
156
158
  *
157
159
  * @tparam M Matrix class, typically from the **Eigen** matrix manipulation library.
158
- * However, other classes are also supported, see the other `run()` methods for details.
160
+ * However, other classes are also supported, see `wrappers.hpp` for details.
159
161
  * @tparam Engine A (pseudo-)random number generator class, returning a randomly sampled value when called as a functor with no arguments.
160
162
  *
161
163
  * @param[in] mat Input matrix.
@@ -273,24 +275,7 @@ public:
273
275
  * and the second entry indicates the number of restart iterations performed.
274
276
  *
275
277
  * Custom classes can be used to define modified matrices that cannot be efficiently realized into the standard **Eigen** classes.
276
- * We expect:
277
- * - A `rows()` method that returns the number of rows.
278
- * - A `cols()` method that returns the number of columns.
279
- * - One of the following for matrix-vector multiplication:
280
- * - `multiply(rhs, out)`, which should compute the product of the matrix with `rhs`, a `Eigen::VectorXd`-equivalent of length equal to the number of columns;
281
- * and stores the result in `out`, an `Eigen::VectorXd` of length equal to the number of rows.
282
- * - A `*` method where the right-hand side is an `Eigen::VectorXd` (or equivalent expression) of length equal to the number of columsn,
283
- * and returns an `Eigen::VectorXd`-equivalent of length equal to the number of rows.
284
- * - One of the following for matrix transpose-vector multiplication:
285
- * - `adjoint_multiply(rhs, out)`, which should compute the product of the matrix transpose with `rhs`, a `Eigen::VectorXd`-equivalent of length equal to the number of rows;
286
- * and stores the result in `out`, an `Eigen::VectorXd` of length equal to the number of columns.
287
- * - An `adjoint()` method that returns an instance of any class that has a `*` method for matrix-vector multiplication.
288
- * The method should accept an `Eigen::VectorXd`-equivalent of length equal to the number of rows,
289
- * and return an `Eigen::VectorXd`-equvialent of length equal to the number of columns.
290
- * - A `realize()` method that returns an `Eigen::MatrixXd` object representing the modified matrix.
291
- * This can be omitted if an `Eigen::MatrixXd` can be copy-constructed from the class.
292
- *
293
- * See the `Centered` and `Scaled` classes for more details.
278
+ * See the `wrappers.hpp` file for more details, along with the `Centered` and `Scaled` classes.
294
279
  *
295
280
  * If the smallest dimension of `mat` is below 6, this method falls back to performing an exact SVD.
296
281
  */
@@ -485,7 +470,7 @@ private:
485
470
 
486
471
  public:
487
472
  /**
488
- * Result of the IRLBA-based decomposition.
473
+ * @brief Result of the IRLBA-based decomposition.
489
474
  */
490
475
  struct Results {
491
476
  /**
@@ -509,12 +494,12 @@ public:
509
494
  Eigen::VectorXd D;
510
495
 
511
496
  /**
512
- * Whether the algorithm converged.
497
+ * The number of restart iterations performed.
513
498
  */
514
499
  int iterations;
515
500
 
516
501
  /**
517
- * The number of restart iterations performed.
502
+ * Whether the algorithm converged.
518
503
  */
519
504
  bool converged;
520
505
  };
@@ -523,7 +508,7 @@ public:
523
508
  * Run IRLBA on an input matrix to perform an approximate SVD with centering and scaling.
524
509
  *
525
510
  * @tparam M Matrix class, most typically from the **Eigen** matrix manipulation library.
526
- * However, other classes are also supported, see the other `run()` methods for details.
511
+ * However, other classes are also supported, see `wrappers.hpp` for details.
527
512
  * @tparam Engine A (pseudo-)random number generator class, returning a randomly sampled value when called as a functor with no arguments.
528
513
  *
529
514
  * @param[in] mat Input matrix.
@@ -549,7 +534,7 @@ public:
549
534
  * Run IRLBA on an input matrix to perform an approximate SVD, see the `run()` method for more details.
550
535
  *
551
536
  * @tparam M Matrix class, most typically from the **Eigen** matrix manipulation library.
552
- * However, other classes are also supported, see the other `run()` methods for details.
537
+ * However, other classes are also supported, see `wrappers.hpp` for details.
553
538
  * @tparam Engine A (pseudo-)random number generator class, returning a randomly sampled value when called as a functor with no arguments.
554
539
  *
555
540
  * @param[in] mat Input matrix.
@@ -44,16 +44,22 @@ public:
44
44
 
45
45
  public:
46
46
  /**
47
- * @brief Intermediate data structures to avoid repeated allocations.
47
+ * @tparam M Some kind of matrix class, either from the **Eigen** library or one of **irlba**'s wrappers.
48
+ *
49
+ * @brief Intermediate data structures to avoid repeated allocations on `run()`.
48
50
  */
51
+ template<class M>
49
52
  struct Intermediates {
50
53
  /**
51
- * @tparam M Matrix class, most typically from the **Eigen** library.
52
- *
53
54
  * @param mat Instance of a matrix class `M`.
54
55
  */
55
- template<class M>
56
- Intermediates(const M& mat) : F(mat.cols()), W_next(mat.rows()), orthog_tmp(mat.cols()) {}
56
+ Intermediates(const M& mat) :
57
+ F(mat.cols()),
58
+ W_next(mat.rows()),
59
+ orthog_tmp(mat.cols()),
60
+ work(wrapped_workspace(&mat)),
61
+ awork(wrapped_adjoint_workspace(&mat))
62
+ {}
57
63
 
58
64
  /**
59
65
  * Obtain the residual vector, see algorithm 2.1 of Baglama and Reichel (2005).
@@ -70,13 +76,19 @@ public:
70
76
  Eigen::VectorXd F;
71
77
  Eigen::VectorXd W_next;
72
78
  Eigen::VectorXd orthog_tmp;
79
+ WrappedWorkspace<M> work;
80
+ WrappedAdjointWorkspace<M> awork;
73
81
  /**
74
82
  * @endcond
75
83
  */
76
84
  };
77
85
 
86
+ /**
87
+ * @tparam M Some matrix class, either from the **Eigen** library or one of **irlba**'s wrappers.
88
+ * @return An `Intermediates` object for subsequent calls to `run()` on `mat`.
89
+ */
78
90
  template<class M>
79
- Intermediates initialize(const M& mat) const {
91
+ Intermediates<M> initialize(const M& mat) const {
80
92
  return Intermediates(mat);
81
93
  }
82
94
 
@@ -92,20 +104,19 @@ public:
92
104
  * @tparam Engine A functor that, when called with no arguments, returns a random integer from a discrete uniform distribution.
93
105
  *
94
106
  * @param mat Input matrix.
95
- * @param W Output matrix with number of rows equal to `mat.rows()`.
107
+ * @param[in, out] W Output matrix with number of rows equal to `mat.rows()`.
96
108
  * The size of the working subspace is defined from the number of columns.
97
109
  * The first `start` columns should contain orthonormal column vectors with non-zero L2 norms.
98
- * @param V Matrix with number of rows equal to `mat.cols()` and number of columns equal to `W.cols()`.
110
+ * On output, the rest of `W` is filled with orthonormal vectors.
111
+ * @param[in, out] V Matrix with number of rows equal to `mat.cols()` and number of columns equal to `W.cols()`.
99
112
  * The first `start + 1` columns should contain orthonormal column vectors with non-zero L2 norms.
100
- * @param B Square matrix with number of rows and columns equal to the size of the working subspace.
113
+ * On output, the rest of `V` is filled with orthonormal vectors.
114
+ * @param[in, out] B Square matrix with number of rows and columns equal to the size of the working subspace.
101
115
  * Number of values is defined by `set_number()`.
116
+ * On output, `B` is filled with upper diagonal entries, starting from the `start`-th row/column.
102
117
  * @param eng An instance of a random number `Engine`.
103
118
  * @param inter Collection of intermediate data structures generated by calling `initialize()` on `mat`.
104
119
  * @param start The dimension from which to start the bidiagonalization.
105
- *
106
- * @return
107
- * `W` is filled with orthonormal vectors, as is `V`.
108
- * `B` is filled with upper diagonal entries.
109
120
  */
110
121
  template<class M, class Engine>
111
122
  void run(
@@ -114,7 +125,7 @@ public:
114
125
  Eigen::MatrixXd& V,
115
126
  Eigen::MatrixXd& B,
116
127
  Engine& eng,
117
- Intermediates& inter,
128
+ Intermediates<M>& inter,
118
129
  int start = 0)
119
130
  const {
120
131
  const double eps = (epsilon < 0 ? std::pow(std::numeric_limits<double>::epsilon(), 0.8) : epsilon);
@@ -125,31 +136,23 @@ public:
125
136
  auto& otmp = inter.orthog_tmp;
126
137
 
127
138
  F = V.col(start);
128
- if constexpr(has_multiply_method<M>::value) {
129
- W_next.noalias() = mat * F;
130
- } else {
131
- mat.multiply(F, W_next);
132
- }
139
+ wrapped_multiply(&mat, F, inter.work, W_next); // i.e., W_next = mat * F;
133
140
 
134
- // If start = 0, we assume that it's already normalized, see argument description for 'V'.
141
+ // If start = 0, there's nothing to orthogonalize against.
135
142
  if (start) {
136
143
  orthogonalize_vector(W, W_next, start, otmp);
137
144
  }
138
145
 
139
146
  double S = W_next.norm();
140
147
  if (S < eps) {
141
- throw -4;
148
+ throw std::runtime_error("starting vector near the null space of the input matrix");
142
149
  }
143
150
  W_next /= S;
144
151
  W.col(start) = W_next;
145
152
 
146
153
  // The Lanczos iterations themselves.
147
154
  for (int j = start; j < work; ++j) {
148
- if constexpr(has_adjoint_multiply_method<M>::value) {
149
- F.noalias() = mat.adjoint() * W.col(j);
150
- } else {
151
- mat.adjoint_multiply(W.col(j), F);
152
- }
155
+ wrapped_adjoint_multiply(&mat, W.col(j), inter.awork, F); // i.e., F = mat.adjoint() * W.col(j);
153
156
 
154
157
  F -= S * V.col(j); // equivalent to daxpy.
155
158
  orthogonalize_vector(V, F, j + 1, otmp);
@@ -172,11 +175,7 @@ public:
172
175
  B(j, j) = S;
173
176
  B(j, j + 1) = R_F;
174
177
 
175
- if constexpr(has_multiply_method<M>::value) {
176
- W_next.noalias() = mat * F;
177
- } else {
178
- mat.multiply(F, W_next);
179
- }
178
+ wrapped_multiply(&mat, F, inter.work, W_next); // i.e., W_next = mat * F;
180
179
 
181
180
  // Full re-orthogonalization, using the left-most 'j + 1' columns of W.
182
181
  // Recall that W_next will be the 'j + 2'-th column, i.e., W.col(j + 1) in
@@ -235,20 +235,6 @@ private:
235
235
  private:
236
236
  template<class Right>
237
237
  void indirect_multiply(const Right& rhs, Eigen::VectorXd& output) const {
238
- if constexpr(has_data_method<Right>::value) {
239
- // If it has a .data() method, the data values are already computed
240
- // and sitting in memory, so we just use that directly.
241
- indirect_multiply_internal(rhs, output);
242
- } else {
243
- // Otherwise, it is presumably an expression that involves some work
244
- // to get the values. We realize it into a VectorXd to ensure that
245
- // it is not repeatedly evaluated on each access to 'rhs'.
246
- indirect_multiply_internal(Eigen::VectorXd(rhs), output);
247
- }
248
- }
249
-
250
- template<class Right>
251
- void indirect_multiply_internal(const Right& rhs, Eigen::VectorXd& output) const {
252
238
  output.setZero();
253
239
 
254
240
  if (nthreads == 1) {
@@ -270,8 +256,8 @@ private:
270
256
  IRLBA_CUSTOM_PARALLEL(nthreads, [&](int t) -> void {
271
257
  #endif
272
258
 
273
- auto starts = secondary_nonzero_starts[t];
274
- auto ends = secondary_nonzero_starts[t + 1];
259
+ const auto& starts = secondary_nonzero_starts[t];
260
+ const auto& ends = secondary_nonzero_starts[t + 1];
275
261
  for (size_t c = 0; c < primary_dim; ++c) {
276
262
  auto start = starts[c];
277
263
  auto end = ends[c];
@@ -293,20 +279,6 @@ private:
293
279
  private:
294
280
  template<class Right>
295
281
  void direct_multiply(const Right& rhs, Eigen::VectorXd& output) const {
296
- if constexpr(has_data_method<Right>::value) {
297
- // If it has a .data() method, the data values are already computed
298
- // and sitting in memory, so we just use that directly.
299
- direct_multiply_internal(rhs, output);
300
- } else {
301
- // Otherwise, it is presumably an expression that involves some work
302
- // to get the values. We realize it into a VectorXd to ensure that
303
- // it is not repeatedly evaluated on each access to 'rhs'.
304
- direct_multiply_internal(Eigen::VectorXd(rhs), output);
305
- }
306
- }
307
-
308
- template<class Right>
309
- void direct_multiply_internal(const Right& rhs, Eigen::VectorXd& output) const {
310
282
  if (nthreads == 1) {
311
283
  for (size_t c = 0; c < primary_dim; ++c) {
312
284
  output.coeffRef(c) = column_dot_product(c, rhs);
@@ -346,18 +318,45 @@ private:
346
318
  return dot;
347
319
  }
348
320
 
321
+ public:
322
+ /**
323
+ * Workspace type for `multiply()`.
324
+ * Currently this is a placeholder.
325
+ */
326
+ typedef bool Workspace;
327
+
328
+ /**
329
+ * @return Workspace for use in `multiply()`.
330
+ */
331
+ bool workspace() const {
332
+ return false;
333
+ }
334
+
335
+ /**
336
+ * Workspace type for `adjoint_multiply()`.
337
+ * Currently this is a placeholder.
338
+ */
339
+ typedef bool AdjointWorkspace;
340
+
341
+ /**
342
+ * @return Workspace for use in `adjoint_multiply()`.
343
+ */
344
+ bool adjoint_workspace() const {
345
+ return false;
346
+ }
347
+
349
348
  public:
350
349
  /**
351
350
  * @tparam Right An `Eigen::VectorXd` or equivalent expression.
352
351
  *
353
352
  * @param[in] rhs The right-hand side of the matrix product.
354
353
  * This should be a vector or have only one column.
355
- * @param[out] out The output vector to store the matrix product.
356
- *
357
- * @return `out` is filled with the product of this matrix and `rhs`.
354
+ * @param work The return value of `workspace()`.
355
+ * @param[out] output The output vector to store the matrix product.
356
+ * This is filled with the product of this matrix and `rhs`.
358
357
  */
359
358
  template<class Right>
360
- void multiply(const Right& rhs, Eigen::VectorXd& output) const {
359
+ void multiply(const Right& rhs, Workspace& work, Eigen::VectorXd& output) const {
361
360
  if constexpr(column_major) {
362
361
  indirect_multiply(rhs, output);
363
362
  } else {
@@ -370,12 +369,12 @@ public:
370
369
  *
371
370
  * @param[in] rhs The right-hand side of the matrix product.
372
371
  * This should be a vector or have only one column.
373
- * @param[out] out The output vector to store the matrix product.
374
- *
375
- * @return `out` is filled with the product of the transpose of this matrix and `rhs`.
372
+ * @param work The return value of `adjoint_workspace()`.
373
+ * @param[out] output The output vector to store the matrix product.
374
+ * This is filled with the product of the transpose of this matrix and `rhs`.
376
375
  */
377
376
  template<class Right>
378
- void adjoint_multiply(const Right& rhs, Eigen::VectorXd& output) const {
377
+ void adjoint_multiply(const Right& rhs, AdjointWorkspace& work, Eigen::VectorXd& output) const {
379
378
  if constexpr(column_major) {
380
379
  direct_multiply(rhs, output);
381
380
  } else {
@@ -18,12 +18,11 @@ namespace irlba {
18
18
  * Orthogonalize a vector against a set of orthonormal column vectors.
19
19
  *
20
20
  * @param mat A matrix where the left-most `ncols` columns are orthonormal vectors.
21
- * @param vec The vector of interest, of length equal to the number of rows in `mat`.
21
+ * @param[in, out] vec The vector of interest, of length equal to the number of rows in `mat`.
22
+ * On output, this is modified to contain `vec - mat0 * t(mat0) * vec`, where `mat0` is defined as the first `ncols` columns of `mat`.
23
+ * This ensures that it is orthogonal to each column of `mat0`.
22
24
  * @param tmp A vector of length equal to `mat.cols()`, used to store intermediate matrix products.
23
25
  * @param ncols Number of left-most columns of `mat` to use.
24
- *
25
- * @return `vec` is modified to contain `vec - mat0 * t(mat0) * vec`, where `mat0` is defined as the first `ncols` columns of `mat`.
26
- * This ensures that it is orthogonal to each column of `mat0`.
27
26
  */
28
27
  inline void orthogonalize_vector(const Eigen::MatrixXd& mat, Eigen::VectorXd& vec, size_t ncols, Eigen::VectorXd& tmp) {
29
28
  tmp.head(ncols).noalias() = mat.leftCols(ncols).adjoint() * vec;
@@ -34,13 +33,12 @@ inline void orthogonalize_vector(const Eigen::MatrixXd& mat, Eigen::VectorXd& ve
34
33
  /**
35
34
  * Fill an **Eigen** vector with random normals via **aarand**.
36
35
  *
37
- * @param Vec Any **Eigen** vector class or equivalent proxy object.
38
- * @param Engine A (pseudo-)random number generator class that returns a random number when called with no arguments.
36
+ * @tparam Vec Any **Eigen** vector class or equivalent proxy object.
37
+ * @tparam Engine A (pseudo-)random number generator class that returns a random number when called with no arguments.
39
38
  *
40
- * @param vec Instance of a `Vec` class.
39
+ * @param[out] vec Instance of a `Vec` class.
40
+ * This is filled with random draws from a standard normal distribution.
41
41
  * @param eng Instance of an `Engine` class.
42
- *
43
- * @return `vec` is filled with random draws from a standard normal distribution.
44
42
  */
45
43
  template<class Vec, class Engine>
46
44
  void fill_with_random_normals(Vec& vec, Engine& eng) {
@@ -77,13 +75,13 @@ struct ColumnVectorProxy {
77
75
  /**
78
76
  * Fill a column of an **Eigen** matrix with random normals via **aarand**.
79
77
  *
80
- * @param Matrix Any **Eigen** matrix class or equivalent proxy object.
81
- * @param Engine A (pseudo-)random number generator class that returns a random number when called with no arguments.
78
+ * @tparam Matrix Any **Eigen** matrix class or equivalent proxy object.
79
+ * @tparam Engine A (pseudo-)random number generator class that returns a random number when called with no arguments.
82
80
  *
83
81
  * @param mat Instance of a `Matrix` class.
82
+ * The `column` column of this matrix is filled with random draws from a standard normal distribution.
83
+ * @param column Column of `mat` to be filled.
84
84
  * @param eng Instance of an `Engine` class.
85
- *
86
- * @return The `column` column of `mat` is filled with random draws from a standard normal distribution.
87
85
  */
88
86
  template<class Matrix, class Engine>
89
87
  void fill_with_random_normals(Matrix& mat, int column, Engine& eng) {
@@ -145,6 +143,7 @@ public:
145
143
  *
146
144
  * @param sv Vector of singular values.
147
145
  * @param residuals Vector of residuals for each singular value/vector.
146
+ * @param last Vector of singular values from the previous iteration.
148
147
  *
149
148
  * @return The number of singular values/vectors that have achieved convergence.
150
149
  */
@@ -205,16 +204,6 @@ template<class M>
205
204
  struct has_realize_method<M, decltype((void) std::declval<M>().realize(), 0)> {
206
205
  static constexpr bool value = std::is_same<decltype(std::declval<M>().realize()), Eigen::MatrixXd>::value;
207
206
  };
208
-
209
- template<class M, typename = int>
210
- struct has_data_method {
211
- static constexpr bool value = false;
212
- };
213
-
214
- template<class M>
215
- struct has_data_method<M, decltype((void) (std::declval<M>().data()), 0)> {
216
- static constexpr bool value = true;
217
- };
218
207
  /**
219
208
  * @endcond
220
209
  */