nmatrix 0.0.1 → 0.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (91) hide show
  1. data/.gitignore +27 -0
  2. data/.rspec +2 -0
  3. data/Gemfile +3 -5
  4. data/Guardfile +6 -0
  5. data/History.txt +33 -0
  6. data/Manifest.txt +41 -38
  7. data/README.rdoc +88 -11
  8. data/Rakefile +35 -53
  9. data/ext/nmatrix/data/complex.h +372 -0
  10. data/ext/nmatrix/data/data.cpp +275 -0
  11. data/ext/nmatrix/data/data.h +707 -0
  12. data/ext/nmatrix/data/rational.h +421 -0
  13. data/ext/nmatrix/data/ruby_object.h +446 -0
  14. data/ext/nmatrix/extconf.rb +101 -51
  15. data/ext/nmatrix/new_extconf.rb +56 -0
  16. data/ext/nmatrix/nmatrix.cpp +1609 -0
  17. data/ext/nmatrix/nmatrix.h +265 -849
  18. data/ext/nmatrix/ruby_constants.cpp +134 -0
  19. data/ext/nmatrix/ruby_constants.h +103 -0
  20. data/ext/nmatrix/storage/common.cpp +70 -0
  21. data/ext/nmatrix/storage/common.h +170 -0
  22. data/ext/nmatrix/storage/dense.cpp +665 -0
  23. data/ext/nmatrix/storage/dense.h +116 -0
  24. data/ext/nmatrix/storage/list.cpp +1088 -0
  25. data/ext/nmatrix/storage/list.h +129 -0
  26. data/ext/nmatrix/storage/storage.cpp +658 -0
  27. data/ext/nmatrix/storage/storage.h +99 -0
  28. data/ext/nmatrix/storage/yale.cpp +1601 -0
  29. data/ext/nmatrix/storage/yale.h +208 -0
  30. data/ext/nmatrix/ttable_helper.rb +126 -0
  31. data/ext/nmatrix/{yale/smmp1_header.template.c → types.h} +36 -9
  32. data/ext/nmatrix/util/io.cpp +295 -0
  33. data/ext/nmatrix/util/io.h +117 -0
  34. data/ext/nmatrix/util/lapack.h +1175 -0
  35. data/ext/nmatrix/util/math.cpp +557 -0
  36. data/ext/nmatrix/util/math.h +1363 -0
  37. data/ext/nmatrix/util/sl_list.cpp +475 -0
  38. data/ext/nmatrix/util/sl_list.h +255 -0
  39. data/ext/nmatrix/util/util.h +78 -0
  40. data/lib/nmatrix/blas.rb +70 -0
  41. data/lib/nmatrix/io/mat5_reader.rb +567 -0
  42. data/lib/nmatrix/io/mat_reader.rb +162 -0
  43. data/lib/{string.rb → nmatrix/monkeys.rb} +49 -2
  44. data/lib/nmatrix/nmatrix.rb +199 -0
  45. data/lib/nmatrix/nvector.rb +103 -0
  46. data/lib/nmatrix/version.rb +27 -0
  47. data/lib/nmatrix.rb +22 -230
  48. data/nmatrix.gemspec +59 -0
  49. data/scripts/mac-brew-gcc.sh +47 -0
  50. data/spec/4x4_sparse.mat +0 -0
  51. data/spec/4x5_dense.mat +0 -0
  52. data/spec/blas_spec.rb +47 -0
  53. data/spec/elementwise_spec.rb +164 -0
  54. data/spec/io_spec.rb +60 -0
  55. data/spec/lapack_spec.rb +52 -0
  56. data/spec/math_spec.rb +96 -0
  57. data/spec/nmatrix_spec.rb +93 -89
  58. data/spec/nmatrix_yale_spec.rb +52 -36
  59. data/spec/nvector_spec.rb +1 -1
  60. data/spec/slice_spec.rb +257 -0
  61. data/spec/spec_helper.rb +51 -0
  62. data/spec/utm5940.mtx +83844 -0
  63. metadata +113 -71
  64. data/.autotest +0 -23
  65. data/.gemtest +0 -0
  66. data/ext/nmatrix/cblas.c +0 -150
  67. data/ext/nmatrix/dense/blas_header.template.c +0 -52
  68. data/ext/nmatrix/dense/elementwise.template.c +0 -107
  69. data/ext/nmatrix/dense/gemm.template.c +0 -159
  70. data/ext/nmatrix/dense/gemv.template.c +0 -130
  71. data/ext/nmatrix/dense/rationalmath.template.c +0 -68
  72. data/ext/nmatrix/dense.c +0 -307
  73. data/ext/nmatrix/depend +0 -18
  74. data/ext/nmatrix/generator/syntax_tree.rb +0 -481
  75. data/ext/nmatrix/generator.rb +0 -594
  76. data/ext/nmatrix/list.c +0 -774
  77. data/ext/nmatrix/nmatrix.c +0 -1977
  78. data/ext/nmatrix/rational.c +0 -98
  79. data/ext/nmatrix/yale/complexmath.template.c +0 -71
  80. data/ext/nmatrix/yale/elementwise.template.c +0 -46
  81. data/ext/nmatrix/yale/elementwise_op.template.c +0 -73
  82. data/ext/nmatrix/yale/numbmm.template.c +0 -94
  83. data/ext/nmatrix/yale/smmp1.template.c +0 -21
  84. data/ext/nmatrix/yale/smmp2.template.c +0 -43
  85. data/ext/nmatrix/yale/smmp2_header.template.c +0 -46
  86. data/ext/nmatrix/yale/sort_columns.template.c +0 -56
  87. data/ext/nmatrix/yale/symbmm.template.c +0 -54
  88. data/ext/nmatrix/yale/transp.template.c +0 -68
  89. data/ext/nmatrix/yale.c +0 -726
  90. data/lib/array.rb +0 -67
  91. data/spec/syntax_tree_spec.rb +0 -46
@@ -0,0 +1,1175 @@
1
+ /////////////////////////////////////////////////////////////////////
2
+ // = NMatrix
3
+ //
4
+ // A linear algebra library for scientific computation in Ruby.
5
+ // NMatrix is part of SciRuby.
6
+ //
7
+ // NMatrix was originally inspired by and derived from NArray, by
8
+ // Masahiro Tanaka: http://narray.rubyforge.org
9
+ //
10
+ // == Copyright Information
11
+ //
12
+ // SciRuby is Copyright (c) 2010 - 2012, Ruby Science Foundation
13
+ // NMatrix is Copyright (c) 2012, Ruby Science Foundation
14
+ //
15
+ // Please see LICENSE.txt for additional copyright notices.
16
+ //
17
+ // == Contributing
18
+ //
19
+ // By contributing source code to SciRuby, you agree to be bound by
20
+ // our Contributor Agreement:
21
+ //
22
+ // * https://github.com/SciRuby/sciruby/wiki/Contributor-Agreement
23
+ //
24
+ // == lapack.h
25
+ //
26
+ // Templated versions of LAPACK functions, in C++.
27
+
28
+ #ifndef LAPACK_H
29
+ #define LAPACK_H
30
+
31
+ #include <cmath> // std::round
32
+
33
+ #include "math.h"
34
+
35
+ namespace nm { namespace math { namespace lapack {
36
+
37
+
38
+
39
+ /* -- LAPACK auxiliary routine (version 3.2) -- */
40
+ /* -- LAPACK is a software package provided by Univ. of Tennessee, -- */
41
+ /* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
42
+ /* November 2006 */
43
+
44
+ /* .. Scalar Arguments .. */
45
+
46
+ /* Purpose */
47
+ /* ======= */
48
+
49
+ /* This program sets problem and machine dependent parameters */
50
+ /* useful for xHSEQR and its subroutines. It is called whenever */
51
+ /* ILAENV is called with 12 <= ISPEC <= 16 */
52
+
53
+ /* Arguments */
54
+ /* ========= */
55
+
56
+ /* ISPEC (input) int scalar */
57
+ /* ISPEC specifies which tunable parameter IPARMQ should */
58
+ /* return. */
59
+
60
+ /* ISPEC=12: (INMIN) Matrices of order nmin or less */
61
+ /* are sent directly to xLAHQR, the implicit */
62
+ /* double shift QR algorithm. NMIN must be */
63
+ /* at least 11. */
64
+
65
+ /* ISPEC=13: (INWIN) Size of the deflation window. */
66
+ /* This is best set greater than or equal to */
67
+ /* the number of simultaneous shifts NS. */
68
+ /* Larger matrices benefit from larger deflation */
69
+ /* windows. */
70
+
71
+ /* ISPEC=14: (INIBL) Determines when to stop nibbling and */
72
+ /* invest in an (expensive) multi-shift QR sweep. */
73
+ /* If the aggressive early deflation subroutine */
74
+ /* finds LD converged eigenvalues from an order */
75
+ /* NW deflation window and LD.GT.(NW*NIBBLE)/100, */
76
+ /* then the next QR sweep is skipped and early */
77
+ /* deflation is applied immediately to the */
78
+ /* remaining active diagonal block. Setting */
79
+ /* IPARMQ(ISPEC=14) = 0 causes TTQRE to skip a */
80
+ /* multi-shift QR sweep whenever early deflation */
81
+ /* finds a converged eigenvalue. Setting */
82
+ /* IPARMQ(ISPEC=14) greater than or equal to 100 */
83
+ /* prevents TTQRE from skipping a multi-shift */
84
+ /* QR sweep. */
85
+
86
+ /* ISPEC=15: (NSHFTS) The number of simultaneous shifts in */
87
+ /* a multi-shift QR iteration. */
88
+
89
+ /* ISPEC=16: (IACC22) IPARMQ is set to 0, 1 or 2 with the */
90
+ /* following meanings. */
91
+ /* 0: During the multi-shift QR sweep, */
92
+ /* xLAQR5 does not accumulate reflections and */
93
+ /* does not use matrix-matrix multiply to */
94
+ /* update the far-from-diagonal matrix */
95
+ /* entries. */
96
+ /* 1: During the multi-shift QR sweep, */
97
+ /* xLAQR5 and/or xLAQRaccumulates reflections and uses */
98
+ /* matrix-matrix multiply to update the */
99
+ /* far-from-diagonal matrix entries. */
100
+ /* 2: During the multi-shift QR sweep. */
101
+ /* xLAQR5 accumulates reflections and takes */
102
+ /* advantage of 2-by-2 block structure during */
103
+ /* matrix-matrix multiplies. */
104
+ /* (If xTRMM is slower than xGEMM, then */
105
+ /* IPARMQ(ISPEC=16)=1 may be more efficient than */
106
+ /* IPARMQ(ISPEC=16)=2 despite the greater level of */
107
+ /* arithmetic work implied by the latter choice.) */
108
+
109
+ /* NAME (input) character string */
110
+ /* Name of the calling subroutine */
111
+
112
+ /* OPTS (input) character string */
113
+ /* This is a concatenation of the string arguments to */
114
+ /* TTQRE. */
115
+
116
+ /* N (input) int scalar */
117
+ /* N is the order of the Hessenberg matrix H. */
118
+
119
+ /* ILO (input) INTEGER */
120
+ /* IHI (input) INTEGER */
121
+ /* It is assumed that H is already upper triangular */
122
+ /* in rows and columns 1:ILO-1 and IHI+1:N. */
123
+
124
+ /* LWORK (input) int scalar */
125
+ /* The amount of workspace available. */
126
+
127
+ /* Further Details */
128
+ /* =============== */
129
+
130
+ /* Little is known about how best to choose these parameters. */
131
+ /* It is possible to use different values of the parameters */
132
+ /* for each of CHSEQR, DHSEQR, SHSEQR and ZHSEQR. */
133
+
134
+ /* It is probably best to choose different parameters for */
135
+ /* different matrices and different parameters at different */
136
+ /* times during the iteration, but this has not been */
137
+ /* implemented --- yet. */
138
+
139
+
140
+ /* The best choices of most of the parameters depend */
141
+ /* in an ill-understood way on the relative execution */
142
+ /* rate of xLAQR3 and xLAQR5 and on the nature of each */
143
+ /* particular eigenvalue problem. Experiment may be the */
144
+ /* only practical way to determine which choices are most */
145
+ /* effective. */
146
+
147
+ /* Following is a list of default values supplied by IPARMQ. */
148
+ /* These defaults may be adjusted in order to attain better */
149
+ /* performance in any particular computational environment. */
150
+
151
+ /* IPARMQ(ISPEC=12) The xLAHQR vs xLAQR0 crossover point. */
152
+ /* Default: 75. (Must be at least 11.) */
153
+
154
+ /* IPARMQ(ISPEC=13) Recommended deflation window size. */
155
+ /* This depends on ILO, IHI and NS, the */
156
+ /* number of simultaneous shifts returned */
157
+ /* by IPARMQ(ISPEC=15). The default for */
158
+ /* (IHI-ILO+1).LE.500 is NS. The default */
159
+ /* for (IHI-ILO+1).GT.500 is 3*NS/2. */
160
+
161
+ /* IPARMQ(ISPEC=14) Nibble crossover point. Default: 14. */
162
+
163
+ /* IPARMQ(ISPEC=15) Number of simultaneous shifts, NS. */
164
+ /* a multi-shift QR iteration. */
165
+
166
+ /* If IHI-ILO+1 is ... */
167
+
168
+ /* greater than ...but less ... the */
169
+ /* or equal to ... than default is */
170
+
171
+ /* 0 30 NS = 2+ */
172
+ /* 30 60 NS = 4+ */
173
+ /* 60 150 NS = 10 */
174
+ /* 150 590 NS = ** */
175
+ /* 590 3000 NS = 64 */
176
+ /* 3000 6000 NS = 128 */
177
+ /* 6000 infinity NS = 256 */
178
+
179
+ /* (+) By default matrices of this order are */
180
+ /* passed to the implicit double shift routine */
181
+ /* xLAHQR. See IPARMQ(ISPEC=12) above. These */
182
+ /* values of NS are used only in case of a rare */
183
+ /* xLAHQR failure. */
184
+
185
+ /* (**) The asterisks (**) indicate an ad-hoc */
186
+ /* function increasing from 10 to 64. */
187
+
188
+ /* IPARMQ(ISPEC=16) Select structured matrix multiply. */
189
+ /* (See ISPEC=16 above for details.) */
190
+ /* Default: 3. */
191
+
192
+ /* ================================================================ */
193
+ inline int iparmq(int ispec, int ilo, int ihi) {
194
+
195
+ const int INMIN = 12, INWIN = 13, INIBL = 14, ISHFTS = 15, IACC22 = 16;
196
+ const int NMIN = 75, K22MIN = 14, KACMIN = 14, NIBBLE = 14, KNWSWP = 500;
197
+
198
+ int ns = 2, nh = ihi - ilo + 1;
199
+
200
+ if (ispec == ISHFTS || ispec == INWIN|| ispec == IACC22) {
201
+
202
+ /* ==== Set the number of simultaneous shifts ==== */
203
+ if (nh >= 30) ns = 4;
204
+ if (nh >= 60) ns = 10;
205
+ if (nh >= 150) ns = std::max(10, (int)(nh / std::round(std::log((float) (ihi - ilo + 1)) / log(2.f)))); /* Computing MAX */
206
+ if (nh >= 590) ns = 64;
207
+ if (nh >= 3000) ns = 128;
208
+ if (nh >= 6000) ns = 256;
209
+ ns = std::max(2,ns - ns % 2); /* Computing MAX */
210
+ }
211
+
212
+ if (ispec == INMIN) {
213
+ /* ===== Matrices of order smaller than NMIN get sent */
214
+ /* . to xLAHQR, the classic double shift algorithm. */
215
+ /* . This must be at least 11. ==== */
216
+ return NMIN;
217
+
218
+ } else if (ispec == INIBL) {
219
+
220
+ /* ==== INIBL: skip a multi-shift qr iteration and */
221
+ /* . whenever aggressive early deflation finds */
222
+ /* . at least (NIBBLE*(window size)/100) deflations. ==== */
223
+
224
+ return NIBBLE;
225
+
226
+ } else if (ispec == ISHFTS) {
227
+
228
+ /* ==== NSHFTS: The number of simultaneous shifts ===== */
229
+ return ns;
230
+
231
+ } else if (ispec == INWIN) {
232
+
233
+ /* ==== NW: deflation window size. ==== */
234
+
235
+ if (nh <= KNWSWP) return ns;
236
+ else return ns * 3 / 2;
237
+
238
+ } else if (ispec == 16) {
239
+
240
+ /* ==== IACC22: Whether to accumulate reflections */
241
+ /* . before updating the far-from-diagonal elements */
242
+ /* . and whether to use 2-by-2 block structure while */
243
+ /* . doing it. A small amount of work could be saved */
244
+ /* . by making this choice dependent also upon the */
245
+ /* . NH=IHI-ILO+1. */
246
+
247
+ if (ns >= KACMIN) return 1;
248
+ if (ns >= K22MIN) return 2;
249
+
250
+ }
251
+
252
+ return -1;
253
+ } /* iparmq_ */
254
+
255
+
256
+
257
+
258
+ /* Purpose */
259
+ /* ======= */
260
+
261
+ /* DGER performs the rank 1 operation */
262
+
263
+ /* A := alpha*x*y**T + A, */
264
+
265
+ /* where alpha is a scalar, x is an m element vector, y is an n element */
266
+ /* vector and A is an m by n matrix. */
267
+
268
+ /* Arguments */
269
+ /* ========== */
270
+
271
+ /* M - INTEGER. */
272
+ /* On entry, M specifies the number of rows of the matrix A. */
273
+ /* M must be at least zero. */
274
+ /* Unchanged on exit. */
275
+
276
+ /* N - INTEGER. */
277
+ /* On entry, N specifies the number of columns of the matrix A. */
278
+ /* N must be at least zero. */
279
+ /* Unchanged on exit. */
280
+
281
+ /* ALPHA - DOUBLE PRECISION. */
282
+ /* On entry, ALPHA specifies the scalar alpha. */
283
+ /* Unchanged on exit. */
284
+
285
+ /* X - DOUBLE PRECISION array of dimension at least */
286
+ /* ( 1 + ( m - 1 )*abs( INCX ) ). */
287
+ /* Before entry, the incremented array X must contain the m */
288
+ /* element vector x. */
289
+ /* Unchanged on exit. */
290
+
291
+ /* INCX - INTEGER. */
292
+ /* On entry, INCX specifies the increment for the elements of */
293
+ /* X. INCX must not be zero. */
294
+ /* Unchanged on exit. */
295
+
296
+ /* Y - DOUBLE PRECISION array of dimension at least */
297
+ /* ( 1 + ( n - 1 )*abs( INCY ) ). */
298
+ /* Before entry, the incremented array Y must contain the n */
299
+ /* element vector y. */
300
+ /* Unchanged on exit. */
301
+
302
+ /* INCY - INTEGER. */
303
+ /* On entry, INCY specifies the increment for the elements of */
304
+ /* Y. INCY must not be zero. */
305
+ /* Unchanged on exit. */
306
+
307
+ /* A - DOUBLE PRECISION array of DIMENSION ( LDA, n ). */
308
+ /* Before entry, the leading m by n part of the array A must */
309
+ /* contain the matrix of coefficients. On exit, A is */
310
+ /* overwritten by the updated matrix. */
311
+
312
+ /* LDA - INTEGER. */
313
+ /* On entry, LDA specifies the first dimension of A as declared */
314
+ /* in the calling (sub) program. LDA must be at least */
315
+ /* max( 1, m ). */
316
+ /* Unchanged on exit. */
317
+
318
+ /* Further Details */
319
+ /* =============== */
320
+
321
+ /* Level 2 Blas routine. */
322
+
323
+ /* -- Written on 22-October-1986. */
324
+ /* Jack Dongarra, Argonne National Lab. */
325
+ /* Jeremy Du Croz, Nag Central Office. */
326
+ /* Sven Hammarling, Nag Central Office. */
327
+ /* Richard Hanson, Sandia National Labs. */
328
+
329
+ /* ===================================================================== */
330
+
331
+ template <typename DType>
332
+ inline int ger(int m, int n, DType alpha, DType* x, int incx, DType* y, int incy, DType* a, int lda) {
333
+
334
+ // FIXME: Call BLAS ger if available
335
+
336
+ if (m < 0) {
337
+ return 1;
338
+ } else if (n < 0) {
339
+ return 2;
340
+ } else if (incx == 0) {
341
+ return 5;
342
+ } else if (incy == 0) {
343
+ return 7;
344
+ } else if (lda < std::max(1,m)) {
345
+ return 9;
346
+ }
347
+
348
+ if (m == 0 || n == 0 || alpha == 0) return 0; /* Quick return if possible. */
349
+
350
+ /* Start the operations. In this version the elements of A are */
351
+ /* accessed sequentially with one pass through A. */
352
+
353
+ // FIXME: These have been unrolled in a way that the compiler can handle. Collapse into a single case, or optimize
354
+ // FIXME: in a more modern way.
355
+
356
+ int jy = incy > 0 ? 0 : -(n-1) * incy;
357
+
358
+ if (incx == 1) {
359
+
360
+ for (size_t j = 0; j < n; ++j, jy += incy) {
361
+ if (y[jy] != 0) {
362
+ DType temp = alpha * y[jy];
363
+ for (size_t i = 0; i < m; ++i) {
364
+ a[i + j * lda] += x[i] * temp;
365
+ }
366
+ }
367
+ }
368
+
369
+ } else {
370
+
371
+ int kx = incx > 0 ? 0 : -(m-1) * incx;
372
+
373
+ for (size_t j = 0; j < n; ++j, jy += incy) {
374
+ if (y[jy] != 0) {
375
+ DType temp = alpha * y[jy];
376
+
377
+ for (size_t i = 0, ix = kx; i < m; ++i, ix += incx) {
378
+ a[i + j * lda] += x[ix] * temp;
379
+ }
380
+ }
381
+ }
382
+
383
+ }
384
+
385
+ return 0;
386
+
387
+ /* End of DGER . */
388
+
389
+ } /* dger_ */
390
+
391
+
392
+ /* Purpose */
393
+ /* ======= */
394
+
395
+ /* DSCAL scales a vector by a constant. */
396
+ /* uses unrolled loops for increment equal to one. */
397
+
398
+ /* Further Details */
399
+ /* =============== */
400
+
401
+ /* jack dongarra, linpack, 3/11/78. */
402
+ /* modified 3/93 to return if incx .le. 0. */
403
+ /* modified 12/3/93, array(1) declarations changed to array(*) */
404
+
405
+ /* ===================================================================== */
406
+
407
+ template <typename DType>
408
+ inline void scal(const int n, const DType da, DType* dx, const int incx) {
409
+
410
+ // This used to have unrolled loops, like dswap. They were in the way.
411
+
412
+ if (n <= 0 || incx <= 0) return;
413
+
414
+ for (int i = 0; incx < 0 ? i > n*incx : i < n*incx; i += incx) {
415
+ dx[i] = da * dx[i];
416
+ }
417
+ } /* scal */
418
+
419
+
420
+ /* Purpose */
421
+ /* ======= */
422
+
423
+ /* interchanges two vectors. */
424
+ /* uses unrolled loops for increments equal one. */
425
+
426
+ /* Further Details */
427
+ /* =============== */
428
+
429
+ /* jack dongarra, linpack, 3/11/78. */
430
+ /* modified 12/3/93, array(1) declarations changed to array(*) */
431
+
432
+ /* ===================================================================== */
433
+ // Formerly dswap
434
+ template <typename DType>
435
+ inline void swap(int n, DType *dx, int incx, DType *dy, int incy) {
436
+
437
+ /* Function Body */
438
+ if (n <= 0) return;
439
+
440
+ /*
441
+ * The NETLIB version of dswap has loops manually unrolled, per commented code below.
442
+ * This doesn't make sense with modern compilers, which know much more about arch-
443
+ * itectures than we do. Combine that with our use of templates, and it's much more
444
+ * efficient to let the compiler do the unrolling in most cases.
445
+ */
446
+ /*
447
+ if (incx == 1 && incy == 1) { // if both increments are 1
448
+
449
+
450
+ m = n % 3;
451
+ if (m) {
452
+ for (size_t i = 0; i < m; ++i) { // If number is not divisible by three, swap just one or two singly.
453
+ dtemp = dx[i];
454
+ dx[i] = dy[i];
455
+ dy[i] = dtemp;
456
+ }
457
+ if (n < 3) return;
458
+ }
459
+
460
+ for (i = m; i < n; i += 3) { // Why does it swap three at a time? -- John 8/27/12
461
+ DType dtemp = dx[i];
462
+ dx[i] = dy[i];
463
+ dy[i] = dtemp;
464
+
465
+ dtemp = dx[i + 1];
466
+ dx[i + 1] = dy[i + 1];
467
+ dy[i + 1] = dtemp;
468
+
469
+ dtemp = dx[i + 2];
470
+ dx[i + 2] = dy[i + 2];
471
+ dy[i + 2] = dtemp;
472
+ }
473
+
474
+ } else { // when any increment is not 1
475
+ */
476
+
477
+ // For negative increments, start at the end of the array.
478
+ int ix = incx < 0 ? (-n+1)*incx : 0,
479
+ iy = incy < 0 ? (-n+1)*incy : 0;
480
+
481
+ if (incx < 0) ix = (-n + 1) * incx;
482
+ if (incy < 0) iy = (-n + 1) * incy;
483
+
484
+ for (size_t i = 0; i < n; ++i, ix += incx, iy += incy) {
485
+ DType dtemp = dx[ix];
486
+ dx[ix] = dy[iy];
487
+ dy[iy] = dtemp;
488
+ }
489
+ /*} */
490
+ return;
491
+ } /* dswap */
492
+
493
+
494
+
495
+
496
+ /* Purpose */
497
+ /* ======= */
498
+
499
+ /* IDAMAX finds the index of element having max. absolute value. */
500
+
501
+ /* Further Details */
502
+ /* =============== */
503
+
504
+ /* jack dongarra, linpack, 3/11/78. */
505
+ /* modified 3/93 to return if incx .le. 0. */
506
+ /* modified 12/3/93, array(1) declarations changed to array(*) */
507
+
508
+ /* ===================================================================== */
509
+
510
+ template <typename DType>
511
+ inline int idamax(size_t n, DType *dx, int incx) {
512
+
513
+ /* Function Body */
514
+ if (n < 1 || incx <= 0) return -1;
515
+ if (n == 1) return 0;
516
+
517
+ DType dmax;
518
+ size_t imax = 0;
519
+
520
+ if (incx == 1) { // if incrementing by 1
521
+
522
+ dmax = abs(dx[0]);
523
+
524
+ for (size_t i = 1; i < n; ++i) {
525
+ if (std::abs(dx[i]) > dmax) {
526
+ imax = i;
527
+ dmax = std::abs(dx[i]);
528
+ }
529
+ }
530
+
531
+ } else { // if incrementing by more than 1
532
+
533
+ dmax = std::abs(dx[0]);
534
+
535
+ for (size_t i = 1, ix = incx; i < n; ++i, ix += incx) {
536
+ if (std::abs(dx[ix]) > dmax) {
537
+ imax = i;
538
+ dmax = std::abs(dx[ix]);
539
+ }
540
+ }
541
+ }
542
+ return imax;
543
+ } /* idamax_ */
544
+
545
+
546
+
547
+
548
+
549
+
550
+ /* -- LAPACK auxiliary routine (version 3.3.1) -- */
551
+ /* -- LAPACK is a software package provided by Univ. of Tennessee, -- */
552
+ /* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
553
+ /* -- April 2011 -- */
554
+
555
+ /* .. Scalar Arguments .. */
556
+ /* .. */
557
+
558
+ /* Purpose */
559
+ /* ======= */
560
+
561
+ /* IEEECK is called from the ILAENV to verify that Infinity and */
562
+ /* possibly NaN arithmetic is safe (i.e. will not trap). */
563
+ // FIXME: Can we use std::numeric_limits::traps for this?
564
+
565
+ /* Arguments */
566
+ /* ========= */
567
+
568
+ /* ISPEC (input) INTEGER */
569
+ /* Specifies whether to test just for inifinity arithmetic */
570
+ /* or whether to test for infinity and NaN arithmetic. */
571
+ /* = 0: Verify infinity arithmetic only. */
572
+ /* = 1: Verify infinity and NaN arithmetic. */
573
+
574
+ /* ZERO (input) REAL */
575
+ /* Must contain the value 0.0 */
576
+ /* This is passed to prevent the compiler from optimizing */
577
+ /* away this code. */
578
+
579
+ /* ONE (input) REAL */
580
+ /* Must contain the value 1.0 */
581
+ /* This is passed to prevent the compiler from optimizing */
582
+ /* away this code. */
583
+
584
+ /* RETURN VALUE: INTEGER */
585
+ /* = 0: Arithmetic failed to produce the correct answers */
586
+ /* = 1: Arithmetic produced the correct answers */
587
+
588
+ /* ===================================================================== */
589
+
590
+ /*
591
+ * Note from John: This seems totally unnecessary in modern C++.
592
+ * FIXME: Remove this after testing that on modern systems this always returns 1.
593
+ */
594
+
595
+ inline int ieeeck(bool ispec) {
596
+
597
+ float posinf = 1.0 / 0.0;
598
+ if (posinf <= 1.0) return 0;
599
+
600
+ float neginf = -1.0 / 0.0;
601
+ if (neginf >= 0.0) return 0;
602
+
603
+ float negzro = 1.0 / (neginf + 1.0);
604
+ if (negzro != 0.0) return 0;
605
+
606
+ neginf = 1.0 / negzro;
607
+ if (neginf >= 0.0) return 0;
608
+
609
+ float newzro = negzro + 0.0;
610
+ if (newzro != 0.0) return 0;
611
+
612
+ posinf = 1.0 / newzro;
613
+ if (posinf <= 1.0) return 0;
614
+
615
+ neginf *= posinf;
616
+ if (neginf >= 0.0) return 0;
617
+
618
+ posinf *= posinf;
619
+ if (posinf <= 1.0) return 0;
620
+
621
+
622
+ /* Return if we were only asked to check infinity arithmetic */
623
+
624
+ if (!ispec) return 1;
625
+
626
+ float nan1 = posinf + neginf;
627
+ float nan2 = posinf / neginf;
628
+ float nan3 = posinf / posinf;
629
+ float nan4 = posinf * 0.0;
630
+ float nan5 = neginf * negzro;
631
+ float nan6 = nan5 * 0.0;
632
+
633
+ if (nan1 == nan1) return 0;
634
+ if (nan2 == nan2) return 0;
635
+ if (nan3 == nan3) return 0;
636
+ if (nan4 == nan4) return 0;
637
+ if (nan5 == nan5) return 0;
638
+ if (nan6 == nan6) return 0;
639
+
640
+ return 1;
641
+ } /* ieeeck_ */
642
+
643
+
644
+
645
+
646
+ inline int ilaenv_block_size(int n2, int n4, const std::string& c2, const std::string& c3, const std::string& c4, bool sname, bool cname) {
647
+ if (c2 == "GE") { //(s_cmp(c2, "GE", (size_t)2, (size_t)2) == 0) {
648
+ if (c3 == "TRF") { //if (s_cmp(c3, "TRF", (size_t)3, (size_t)3) == 0) {
649
+ if (sname) return 64;
650
+ else return 64;
651
+ } else if (c3 == "QRF" || c3 == "RQF" || c3 == "LQF" || c3 == "QLF") { //(s_cmp(c3, "QRF", (size_t)3, (size_t)3) == 0 || s_cmp(c3, "RQF", (size_t)3, (size_t)3) == 0 || s_cmp(c3, "LQF", (size_t) 3, (size_t)3) == 0 || s_cmp(c3, "QLF", (size_t)3, (size_t)3) == 0) {
652
+ if (sname) return 32;
653
+ else return 32;
654
+ } else if (c3 == "HRD") {
655
+ if (sname) return 32;
656
+ else return 32;
657
+ } else if (c3 == "BRD") {
658
+ if (sname) return 32;
659
+ else return 32;
660
+ } else if (c3 == "TRI") {
661
+ if (sname) return 64;
662
+ else return 64;
663
+ }
664
+ } else if (c2 == "PO") {
665
+ if (c3 == "TRF") {
666
+ if (sname) return 64;
667
+ else return 64;
668
+ }
669
+ } else if (c2 == "SY") {
670
+ if (c3 == "TRF") {
671
+ if (sname) return 64;
672
+ else return 64;
673
+ } else if (sname && c3 == "TRD") {
674
+ return 32;
675
+ } else if (sname && c3 == "GST") {
676
+ return 64;
677
+ }
678
+ } else if (cname && c2 == "HE") {
679
+ if (c3 == "TRF") return 64;
680
+ else if (c3 == "TRD") return 32;
681
+ else if (c3 == "GST") return 64;
682
+ } else if (sname && c2 == "OR") {
683
+ if (c3.at(0) == 'G') {
684
+ if (c4 == "QR" || c4 == "RQ" || c4 == "LQ" || c4 == "QL" || c4 == "HR" || c4 == "TR" || c4 == "BR") return 32;
685
+ } else if (c3.at(0) == 'M') {
686
+ if (c4 == "QR" || c4 == "RQ" || c4 == "LQ" || c4 == "QL" || c4 == "HR" || c4 == "TR" || c4 == "BR") return 32;
687
+ }
688
+ } else if (cname && c2 == "UN") {
689
+ if (c3.at(0) == 'G') {
690
+ if (c4 == "QR" || c4 == "RQ" || c4 == "LQ" || c4 == "QL" || c4 == "HR" || c4 == "TR" || c4 == "BR") return 32;
691
+ } else if (c3.at(0) == 'M') {
692
+ if (c4 == "QR" || c4 == "RQ" || c4 == "LQ" || c4 == "QL" || c4 == "HR" || c4 == "TR" || c4 == "BR") return 32;
693
+ }
694
+ } else if (c2 == "GB") {
695
+ if (c3 == "TRF") {
696
+ if (sname) {
697
+ if (n4 <= 64) return 1;
698
+ else return 32;
699
+ } else {
700
+ if (n4 <= 64) return 1;
701
+ else return 32;
702
+ }
703
+ }
704
+ } else if (c2 == "PB") {
705
+ if (c3 == "TRF") {
706
+ if (sname) {
707
+ if (n2 <= 64) return 1;
708
+ else return 32;
709
+ } else {
710
+ if (n2 <= 64) return 1;
711
+ else return 32;
712
+ }
713
+ }
714
+ } else if (c2 == "TR") {
715
+ if (c3 == "TRI") {
716
+ if (sname) return 64;
717
+ else return 64;
718
+ }
719
+ } else if (c2 == "LA") {
720
+ if (c3 == "UUM") {
721
+ if (sname) return 64;
722
+ else return 64;
723
+ }
724
+ } else if (sname && c2 == "ST") {
725
+ if (c3 == "EBZ") return 1;
726
+ }
727
+ return 1;
728
+ }
729
+
730
+
731
+ inline int ilaenv_min_block_size(const std::string& c2, const std::string& c3, const std::string& c4, bool sname, bool cname) {
732
+
733
+ if (c2 == "GE") {
734
+ if (c3 == "QRF" || c3 == "RQF" || c3 == "LQF" || c3 == "QLF") {
735
+ if (sname) {
736
+ return 2;
737
+ } else {
738
+ return 2;
739
+ }
740
+ } else if (c3 == "HRD") {
741
+ if (sname) {
742
+ return 2;
743
+ } else {
744
+ return 2;
745
+ }
746
+ } else if (c3 == "BRD") {
747
+ if (sname) {
748
+ return 2;
749
+ } else {
750
+ return 2;
751
+ }
752
+ } else if (c3 == "TRI") {
753
+ if (sname) {
754
+ return 2;
755
+ } else {
756
+ return 2;
757
+ }
758
+ }
759
+ } else if (c2 == "SY") {
760
+ if (c3 == "TRF") {
761
+ if (sname) {
762
+ return 8;
763
+ } else {
764
+ return 8;
765
+ }
766
+ } else if (sname && c3 == "TRD") {
767
+ return 2;
768
+ }
769
+ } else if (cname && c2 == "HE") {
770
+ if (c3 == "TRD") {
771
+ return 2;
772
+ }
773
+ } else if (sname && c2 == "OR") {
774
+ if (c3.at(0) == 'G') {
775
+ if (c4 == "QR" || c4 == "RQ" || c4 == "LQ" || c4 == "QL" || c4 == "HR" || c4 == "TR" || c4 == "BR") {
776
+ return 2;
777
+ }
778
+ } else if (c3.at(0) == 'M') {
779
+ if (c4 == "QR" || c4 == "RQ" || c4 == "LQ" || c4 == "QL" || c4 == "HR" || c4 == "TR" || c4 == "BR") {
780
+ return 2;
781
+ }
782
+ }
783
+ } else if (cname && c2 == "UN") {
784
+ if (c3.at(0) == 'G') {
785
+ if (c4 == "QR" || c4 == "RQ" || c4 == "LQ" || c4 == "QL" || c4 == "HR" || c4 == "TR" || c4 == "BR") {
786
+ return 2;
787
+ }
788
+ } else if (c3.at(0) == 'M') {
789
+ if (c4 == "QR" || c4 == "RQ" || c4 == "LQ" || c4 == "QL" || c4 == "HR" || c4 == "TR" || c4 == "BR") {
790
+ return 2;
791
+ }
792
+ }
793
+ }
794
+ return 2;
795
+ }
796
+
797
+
798
+ inline int ilaenv_crossover_point(const std::string& c2, const std::string& c3, const std::string& c4, bool sname, bool cname) {
799
+ if (c2 == "GE") {
800
+ if (c3 == "QRF" || c3 == "RQF" || c3 == "LQF" || c3 == "QLF") {
801
+ if (sname) {
802
+ return 128;
803
+ } else {
804
+ return 128;
805
+ }
806
+ } else if (c3 == "HRD") {
807
+ if (sname) {
808
+ return 128;
809
+ } else {
810
+ return 128;
811
+ }
812
+ } else if (c3 == "BRD") {
813
+ if (sname) {
814
+ return 128;
815
+ } else {
816
+ return 128;
817
+ }
818
+ }
819
+ } else if (c2 == "SY") {
820
+ if (sname && c3 == "TRD") {
821
+ return 32;
822
+ }
823
+ } else if (cname && c2 == "HE") {
824
+ if (c3 == "TRD") {
825
+ return 32;
826
+ }
827
+ } else if (sname && c2 == "OR") {
828
+ if (c3.at(0) == 'G') {
829
+ if (c4 == "QR" || c4 == "RQ" || c4 == "LQ" || c4 == "QL" || c4 == "HR" || c4 == "TR" || c4 == "BR") {
830
+ return 128;
831
+ }
832
+ }
833
+ } else if (cname && c2 == "UN") {
834
+ if (c3.at(0) == 'G') {
835
+ if (c4 == "QR" || c4 == "RQ" || c4 == "LQ" || c4 == "QL" || c4 == "HR" || c4 == "TR" || c4 == "BR") {
836
+ return 128;
837
+ }
838
+ }
839
+ }
840
+ return 0;
841
+ }
842
+
843
+
844
+ /* -- LAPACK auxiliary routine (version 3.2.1) -- */
845
+
846
+ /* -- April 2009 -- */
847
+
848
+ /* -- LAPACK is a software package provided by Univ. of Tennessee, -- */
849
+ /* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
850
+
851
+ /* .. Scalar Arguments .. */
852
+ /* .. */
853
+
854
+ /* Purpose */
855
+ /* ======= */
856
+
857
+ /* ILAENV is called from the LAPACK routines to choose problem-dependent */
858
+ /* parameters for the local environment. See ISPEC for a description of */
859
+ /* the parameters. */
860
+
861
+ /* ILAENV returns an INTEGER */
862
+ /* if ILAENV >= 0: ILAENV returns the value of the parameter specified by ISPEC */
863
+ /* if ILAENV < 0: if ILAENV = -k, the k-th argument had an illegal value. */
864
+
865
+ /* This version provides a set of parameters which should give good, */
866
+ /* but not optimal, performance on many of the currently available */
867
+ /* computers. Users are encouraged to modify this subroutine to set */
868
+ /* the tuning parameters for their particular machine using the option */
869
+ /* and problem size information in the arguments. */
870
+
871
+ /* This routine will not function correctly if it is converted to all */
872
+ /* lower case. Converting it to all upper case is allowed. */
873
+
874
+ /* Arguments */
875
+ /* ========= */
876
+
877
+ /* ISPEC (input) INTEGER */
878
+ /* Specifies the parameter to be returned as the value of */
879
+ /* ILAENV. */
880
+ /* = 1: the optimal blocksize; if this value is 1, an unblocked */
881
+ /* algorithm will give the best performance. */
882
+ /* = 2: the minimum block size for which the block routine */
883
+ /* should be used; if the usable block size is less than */
884
+ /* this value, an unblocked routine should be used. */
885
+ /* = 3: the crossover point (in a block routine, for N less */
886
+ /* than this value, an unblocked routine should be used) */
887
+ /* = 4: the number of shifts, used in the nonsymmetric */
888
+ /* eigenvalue routines (DEPRECATED) */
889
+ /* = 5: the minimum column dimension for blocking to be used; */
890
+ /* rectangular blocks must have dimension at least k by m, */
891
+ /* where k is given by ILAENV(2,...) and m by ILAENV(5,...) */
892
+ /* = 6: the crossover point for the SVD (when reducing an m by n */
893
+ /* matrix to bidiagonal form, if max(m,n)/min(m,n) exceeds */
894
+ /* this value, a QR factorization is used first to reduce */
895
+ /* the matrix to a triangular form.) */
896
+ /* = 7: the number of processors */
897
+ /* = 8: the crossover point for the multishift QR method */
898
+ /* for nonsymmetric eigenvalue problems (DEPRECATED) */
899
+ /* = 9: maximum size of the subproblems at the bottom of the */
900
+ /* computation tree in the divide-and-conquer algorithm */
901
+ /* (used by xGELSD and xGESDD) */
902
+ /* =10: ieee NaN arithmetic can be trusted not to trap */
903
+ /* =11: infinity arithmetic can be trusted not to trap */
904
+ /* 12 <= ISPEC <= 16: */
905
+ /* xHSEQR or one of its subroutines, */
906
+ /* see IPARMQ for detailed explanation */
907
+
908
+ /* NAME (input) CHARACTER*(*) */
909
+ /* The name of the calling subroutine, in either upper case or */
910
+ /* lower case. */
911
+
912
+ /* OPTS (input) CHARACTER*(*) */
913
+ /* The character options to the subroutine NAME, concatenated */
914
+ /* into a single character string. For example, UPLO = 'U', */
915
+ /* TRANS = 'T', and DIAG = 'N' for a triangular routine would */
916
+ /* be specified as OPTS = 'UTN'. */
917
+
918
+ /* N1 (input) INTEGER */
919
+ /* N2 (input) INTEGER */
920
+ /* N3 (input) INTEGER */
921
+ /* N4 (input) INTEGER */
922
+ /* Problem dimensions for the subroutine NAME; these may not all */
923
+ /* be required. */
924
+
925
+ /* Further Details */
926
+ /* =============== */
927
+
928
+ /* The following conventions have been used when calling ILAENV from the */
929
+ /* LAPACK routines: */
930
+ /* 1) OPTS is a concatenation of all of the character options to */
931
+ /* subroutine NAME, in the same order that they appear in the */
932
+ /* argument list for NAME, even if they are not used in determining */
933
+ /* the value of the parameter specified by ISPEC. */
934
+ /* 2) The problem dimensions N1, N2, N3, N4 are specified in the order */
935
+ /* that they appear in the argument list for NAME. N1 is used */
936
+ /* first, N2 second, and so on, and unused problem dimensions are */
937
+ /* passed a value of -1. */
938
+ /* 3) The parameter value returned by ILAENV is checked for validity in */
939
+ /* the calling subroutine. For example, ILAENV is used to retrieve */
940
+ /* the optimal blocksize for STRTRI as follows: */
941
+
942
+ /* NB = ILAENV( 1, 'STRTRI', UPLO // DIAG, N, -1, -1, -1 ) */
943
+ /* IF( NB.LE.1 ) NB = MAX( 1, N ) */
944
+
945
+ /* ===================================================================== */
946
+ inline int ilaenv(int ispec, const std::string& name, int n1, int n2, int n3, int n4) {
947
+
948
+ if (ispec < 1 || ispec > 3) {
949
+ switch (ispec) {
950
+ case 4: return 6; /* ISPEC = 4: number of shifts (used by xHSEQR) */
951
+ case 5: return 2; /* ISPEC = 5: minimum column dimension (not used) */
952
+ case 6: /* ISPEC = 6: crossover point for SVD (used by xGELSS and xGESVD) */
953
+ return (int) ((float)std::min(n1, n2) * 1.6f);
954
+ case 7: return 1; /* ISPEC = 7: number of processors (not used) */
955
+ case 8: return 50; /* ISPEC = 8: crossover point for multishift (used by xHSEQR) */
956
+ case 9: return 25; /* ISPEC = 9: maximum size of the subproblems at the bottom of the */
957
+ /* computation tree in the divide-and-conquer algorithm */
958
+ /* (used by xGELSD and xGESDD) */
959
+ case 10: /* ISPEC = 10: ieee NaN arithmetic can be trusted not to trap */
960
+ return ieeeck(1);
961
+
962
+ case 11: /* ISPEC = 11: infinity arithmetic can be trusted not to trap */
963
+ return ieeeck(0);
964
+
965
+ default:
966
+ if (ispec >= 12 && ispec <= 16) { /* 12 <= ISPEC <= 16: xHSEQR or one of its subroutines. */
967
+ return iparmq(ispec, n2, n3);
968
+ } else {
969
+ return -1; /* Invalid value for ISPEC */
970
+ }
971
+ }
972
+ }
973
+
974
+
975
+ /* Convert NAME to upper case if the first character is lower case. */
976
+
977
+ std::string subnam(name);
978
+ std::transform(subnam.begin(), subnam.end(), subnam.begin(), ::toupper);
979
+ std::string c1(subnam);
980
+
981
+ bool sname = c1.at(0) == 'S' || c1.at(0) == 'D',
982
+ cname = c1.at(0) == 'C' || c1.at(0) == 'Z';
983
+
984
+ if (! (cname || sname)) return 1;
985
+
986
+ std::string c2(subnam.substr(1, 2)),
987
+ c3(subnam.substr(3, 3)),
988
+ c4(c3.substr(1, 2));
989
+
990
+ if (ispec == 2) return ilaenv_min_block_size(c2, c3, c4, sname, cname);
991
+ if (ispec == 3) return ilaenv_crossover_point(c2, c3, c4, sname, cname);
992
+ return ilaenv_block_size(n2, n4, c2, c3, c4, sname, cname);
993
+
994
+ } /* ilaenv_ */
995
+
996
+
997
+
998
+ /* > \brief \b DGETF2 */
999
+
1000
+ /* =========== DOCUMENTATION =========== */
1001
+
1002
+ /* Online html documentation available at */
1003
+ /* http://www.netlib.org/lapack/explore-html/ */
1004
+
1005
+ /* > \htmlonly */
1006
+ /* > Download DGETF2 + dependencies */
1007
+ /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dgetf2.
1008
+ f"> */
1009
+ /* > [TGZ]</a> */
1010
+ /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dgetf2.
1011
+ f"> */
1012
+ /* > [ZIP]</a> */
1013
+ /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dgetf2.
1014
+ f"> */
1015
+ /* > [TXT]</a> */
1016
+ /* > \endhtmlonly */
1017
+
1018
+ /* Definition: */
1019
+ /* =========== */
1020
+
1021
+ /* SUBROUTINE DGETF2( M, N, A, LDA, IPIV, INFO ) */
1022
+
1023
+ /* .. Scalar Arguments .. */
1024
+ /* INTEGER INFO, LDA, M, N */
1025
+ /* .. */
1026
+ /* .. Array Arguments .. */
1027
+ /* INTEGER IPIV( * ) */
1028
+ /* DOUBLE PRECISION A( LDA, * ) */
1029
+ /* .. */
1030
+
1031
+
1032
+ /* > \par Purpose: */
1033
+ /* ============= */
1034
+ /* > */
1035
+ /* > \verbatim */
1036
+ /* > */
1037
+ /* > DGETF2 computes an LU factorization of a general m-by-n matrix A */
1038
+ /* > using partial pivoting with row interchanges. */
1039
+ /* > */
1040
+ /* > The factorization has the form */
1041
+ /* > A = P * L * U */
1042
+ /* > where P is a permutation matrix, L is lower triangular with unit */
1043
+ /* > diagonal elements (lower trapezoidal if m > n), and U is upper */
1044
+ /* > triangular (upper trapezoidal if m < n). */
1045
+ /* > */
1046
+ /* > This is the right-looking Level 2 BLAS version of the algorithm. */
1047
+ /* > \endverbatim */
1048
+
1049
+ /* Arguments: */
1050
+ /* ========== */
1051
+
1052
+ /* > \param[in] M */
1053
+ /* > \verbatim */
1054
+ /* > M is INTEGER */
1055
+ /* > The number of rows of the matrix A. M >= 0. */
1056
+ /* > \endverbatim */
1057
+ /* > */
1058
+ /* > \param[in] N */
1059
+ /* > \verbatim */
1060
+ /* > N is INTEGER */
1061
+ /* > The number of columns of the matrix A. N >= 0. */
1062
+ /* > \endverbatim */
1063
+ /* > */
1064
+ /* > \param[in,out] A */
1065
+ /* > \verbatim */
1066
+ /* > A is DOUBLE PRECISION array, dimension (LDA,N) */
1067
+ /* > On entry, the m by n matrix to be factored. */
1068
+ /* > On exit, the factors L and U from the factorization */
1069
+ /* > A = P*L*U; the unit diagonal elements of L are not stored. */
1070
+ /* > \endverbatim */
1071
+ /* > */
1072
+ /* > \param[in] LDA */
1073
+ /* > \verbatim */
1074
+ /* > LDA is INTEGER */
1075
+ /* > The leading dimension of the array A. LDA >= max(1,M). */
1076
+ /* > \endverbatim */
1077
+ /* > */
1078
+ /* > \param[out] IPIV */
1079
+ /* > \verbatim */
1080
+ /* > IPIV is INTEGER array, dimension (min(M,N)) */
1081
+ /* > The pivot indices; for 1 <= i <= min(M,N), row i of the */
1082
+ /* > matrix was interchanged with row IPIV(i). */
1083
+ /* > \endverbatim */
1084
+ /* > */
1085
+ /* > \param[out] INFO */
1086
+ /* > \verbatim */
1087
+ /* > INFO is INTEGER */
1088
+ /* > = 0: successful exit */
1089
+ /* > < 0: if INFO = -k, the k-th argument had an illegal value */
1090
+ /* > > 0: if INFO = k, U(k,k) is exactly zero. The factorization */
1091
+ /* > has been completed, but the factor U is exactly */
1092
+ /* > singular, and division by zero will occur if it is used */
1093
+ /* > to solve a system of equations. */
1094
+ /* > \endverbatim */
1095
+
1096
+ /* Authors: */
1097
+ /* ======== */
1098
+
1099
+ /* > \author Univ. of Tennessee */
1100
+ /* > \author Univ. of California Berkeley */
1101
+ /* > \author Univ. of Colorado Denver */
1102
+ /* > \author NAG Ltd. */
1103
+
1104
+ /* > \date November 2011 */
1105
+
1106
+ /* > \ingroup doubleGEcomputational */
1107
+
1108
+ /* ===================================================================== */
1109
+
1110
+ template <typename DType>
1111
+ inline int getf2(const int m, const int n, DType* a, const int lda, int *ipiv) {
1112
+
1113
+ /* Function Body */
1114
+ if (m < 0) return -1; // error
1115
+ else if (n < 0) return -2; // error
1116
+ else if (lda < std::max(1,m)) return -4; // error
1117
+
1118
+
1119
+ if (m == 0 || n == 0) return 0; /* Quick return if possible */
1120
+
1121
+ for (size_t j = 0; j < std::min(m,n); ++j) { // changed
1122
+
1123
+ /* Find pivot and test for singularity. */
1124
+
1125
+ int jp = j - 1 + idamax<DType>(m-j+1, &a[j + j * lda], 1);
1126
+
1127
+ ipiv[j] = jp;
1128
+
1129
+
1130
+ if (a[jp + j*lda] != 0) {
1131
+
1132
+ /* Apply the interchange to columns 1:N. */
1133
+ // (Don't swap two columns that are the same.)
1134
+ if (jp != j) swap<DType>(n, &a[j], lda, &a[jp], lda);
1135
+
1136
+ /* Compute elements J+1:M of J-th column. */
1137
+
1138
+ if (j < m-1) {
1139
+ if (std::abs(a[j+j*lda]) >= std::numeric_limits<DType>::min()) {
1140
+ scal<DType>(m-j, 1.0 / a[j+j*lda], &a[j+1+j*lda], 1);
1141
+ } else {
1142
+ for (size_t i = 0; i < m-j; ++i) { // changed
1143
+ a[j+i+j*lda] /= a[j+j*lda];
1144
+ }
1145
+ }
1146
+ }
1147
+
1148
+ } else { // singular matrix
1149
+ return j; // U(j,j) is exactly zero, div by zero if answer is used to solve a system of equations.
1150
+ }
1151
+
1152
+ if (j < std::min(m,n)-1) /* Update trailing submatrix. */
1153
+ ger<DType>(m-j, n-j, -1.0, &a[j+1+j*lda], 1, &a[j+(j+1)*lda], lda, &a[j+1+(j+1)*lda], lda);
1154
+
1155
+ }
1156
+ return 0;
1157
+ } /* dgetf2_ */
1158
+
1159
+
1160
+
1161
+ } // end namespace lapack
1162
+
1163
+ /*
1164
+ * Function signature conversion for LAPACK's scal function.
1165
+ */
1166
+ template <typename DType>
1167
+ inline void clapack_scal(const int n, const void* da, void* dx, const int incx) {
1168
+ // FIXME: See if we can call the clapack version instead of our C++ version.
1169
+ nm::math::lapack::scal<DType>(n, *reinterpret_cast<const DType*>(da), reinterpret_cast<DType*>(dx), incx);
1170
+ }
1171
+
1172
+
1173
+ }}
1174
+
1175
+ #endif