nmatrix 0.0.1 → 0.0.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (91) hide show
  1. data/.gitignore +27 -0
  2. data/.rspec +2 -0
  3. data/Gemfile +3 -5
  4. data/Guardfile +6 -0
  5. data/History.txt +33 -0
  6. data/Manifest.txt +41 -38
  7. data/README.rdoc +88 -11
  8. data/Rakefile +35 -53
  9. data/ext/nmatrix/data/complex.h +372 -0
  10. data/ext/nmatrix/data/data.cpp +275 -0
  11. data/ext/nmatrix/data/data.h +707 -0
  12. data/ext/nmatrix/data/rational.h +421 -0
  13. data/ext/nmatrix/data/ruby_object.h +446 -0
  14. data/ext/nmatrix/extconf.rb +101 -51
  15. data/ext/nmatrix/new_extconf.rb +56 -0
  16. data/ext/nmatrix/nmatrix.cpp +1609 -0
  17. data/ext/nmatrix/nmatrix.h +265 -849
  18. data/ext/nmatrix/ruby_constants.cpp +134 -0
  19. data/ext/nmatrix/ruby_constants.h +103 -0
  20. data/ext/nmatrix/storage/common.cpp +70 -0
  21. data/ext/nmatrix/storage/common.h +170 -0
  22. data/ext/nmatrix/storage/dense.cpp +665 -0
  23. data/ext/nmatrix/storage/dense.h +116 -0
  24. data/ext/nmatrix/storage/list.cpp +1088 -0
  25. data/ext/nmatrix/storage/list.h +129 -0
  26. data/ext/nmatrix/storage/storage.cpp +658 -0
  27. data/ext/nmatrix/storage/storage.h +99 -0
  28. data/ext/nmatrix/storage/yale.cpp +1601 -0
  29. data/ext/nmatrix/storage/yale.h +208 -0
  30. data/ext/nmatrix/ttable_helper.rb +126 -0
  31. data/ext/nmatrix/{yale/smmp1_header.template.c → types.h} +36 -9
  32. data/ext/nmatrix/util/io.cpp +295 -0
  33. data/ext/nmatrix/util/io.h +117 -0
  34. data/ext/nmatrix/util/lapack.h +1175 -0
  35. data/ext/nmatrix/util/math.cpp +557 -0
  36. data/ext/nmatrix/util/math.h +1363 -0
  37. data/ext/nmatrix/util/sl_list.cpp +475 -0
  38. data/ext/nmatrix/util/sl_list.h +255 -0
  39. data/ext/nmatrix/util/util.h +78 -0
  40. data/lib/nmatrix/blas.rb +70 -0
  41. data/lib/nmatrix/io/mat5_reader.rb +567 -0
  42. data/lib/nmatrix/io/mat_reader.rb +162 -0
  43. data/lib/{string.rb → nmatrix/monkeys.rb} +49 -2
  44. data/lib/nmatrix/nmatrix.rb +199 -0
  45. data/lib/nmatrix/nvector.rb +103 -0
  46. data/lib/nmatrix/version.rb +27 -0
  47. data/lib/nmatrix.rb +22 -230
  48. data/nmatrix.gemspec +59 -0
  49. data/scripts/mac-brew-gcc.sh +47 -0
  50. data/spec/4x4_sparse.mat +0 -0
  51. data/spec/4x5_dense.mat +0 -0
  52. data/spec/blas_spec.rb +47 -0
  53. data/spec/elementwise_spec.rb +164 -0
  54. data/spec/io_spec.rb +60 -0
  55. data/spec/lapack_spec.rb +52 -0
  56. data/spec/math_spec.rb +96 -0
  57. data/spec/nmatrix_spec.rb +93 -89
  58. data/spec/nmatrix_yale_spec.rb +52 -36
  59. data/spec/nvector_spec.rb +1 -1
  60. data/spec/slice_spec.rb +257 -0
  61. data/spec/spec_helper.rb +51 -0
  62. data/spec/utm5940.mtx +83844 -0
  63. metadata +113 -71
  64. data/.autotest +0 -23
  65. data/.gemtest +0 -0
  66. data/ext/nmatrix/cblas.c +0 -150
  67. data/ext/nmatrix/dense/blas_header.template.c +0 -52
  68. data/ext/nmatrix/dense/elementwise.template.c +0 -107
  69. data/ext/nmatrix/dense/gemm.template.c +0 -159
  70. data/ext/nmatrix/dense/gemv.template.c +0 -130
  71. data/ext/nmatrix/dense/rationalmath.template.c +0 -68
  72. data/ext/nmatrix/dense.c +0 -307
  73. data/ext/nmatrix/depend +0 -18
  74. data/ext/nmatrix/generator/syntax_tree.rb +0 -481
  75. data/ext/nmatrix/generator.rb +0 -594
  76. data/ext/nmatrix/list.c +0 -774
  77. data/ext/nmatrix/nmatrix.c +0 -1977
  78. data/ext/nmatrix/rational.c +0 -98
  79. data/ext/nmatrix/yale/complexmath.template.c +0 -71
  80. data/ext/nmatrix/yale/elementwise.template.c +0 -46
  81. data/ext/nmatrix/yale/elementwise_op.template.c +0 -73
  82. data/ext/nmatrix/yale/numbmm.template.c +0 -94
  83. data/ext/nmatrix/yale/smmp1.template.c +0 -21
  84. data/ext/nmatrix/yale/smmp2.template.c +0 -43
  85. data/ext/nmatrix/yale/smmp2_header.template.c +0 -46
  86. data/ext/nmatrix/yale/sort_columns.template.c +0 -56
  87. data/ext/nmatrix/yale/symbmm.template.c +0 -54
  88. data/ext/nmatrix/yale/transp.template.c +0 -68
  89. data/ext/nmatrix/yale.c +0 -726
  90. data/lib/array.rb +0 -67
  91. data/spec/syntax_tree_spec.rb +0 -46
@@ -0,0 +1,1175 @@
1
+ /////////////////////////////////////////////////////////////////////
2
+ // = NMatrix
3
+ //
4
+ // A linear algebra library for scientific computation in Ruby.
5
+ // NMatrix is part of SciRuby.
6
+ //
7
+ // NMatrix was originally inspired by and derived from NArray, by
8
+ // Masahiro Tanaka: http://narray.rubyforge.org
9
+ //
10
+ // == Copyright Information
11
+ //
12
+ // SciRuby is Copyright (c) 2010 - 2012, Ruby Science Foundation
13
+ // NMatrix is Copyright (c) 2012, Ruby Science Foundation
14
+ //
15
+ // Please see LICENSE.txt for additional copyright notices.
16
+ //
17
+ // == Contributing
18
+ //
19
+ // By contributing source code to SciRuby, you agree to be bound by
20
+ // our Contributor Agreement:
21
+ //
22
+ // * https://github.com/SciRuby/sciruby/wiki/Contributor-Agreement
23
+ //
24
+ // == lapack.h
25
+ //
26
+ // Templated versions of LAPACK functions, in C++.
27
+
28
+ #ifndef LAPACK_H
29
+ #define LAPACK_H
30
+
31
+ #include <cmath> // std::round
32
+
33
+ #include "math.h"
34
+
35
+ namespace nm { namespace math { namespace lapack {
36
+
37
+
38
+
39
+ /* -- LAPACK auxiliary routine (version 3.2) -- */
40
+ /* -- LAPACK is a software package provided by Univ. of Tennessee, -- */
41
+ /* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
42
+ /* November 2006 */
43
+
44
+ /* .. Scalar Arguments .. */
45
+
46
+ /* Purpose */
47
+ /* ======= */
48
+
49
+ /* This program sets problem and machine dependent parameters */
50
+ /* useful for xHSEQR and its subroutines. It is called whenever */
51
+ /* ILAENV is called with 12 <= ISPEC <= 16 */
52
+
53
+ /* Arguments */
54
+ /* ========= */
55
+
56
+ /* ISPEC (input) int scalar */
57
+ /* ISPEC specifies which tunable parameter IPARMQ should */
58
+ /* return. */
59
+
60
+ /* ISPEC=12: (INMIN) Matrices of order nmin or less */
61
+ /* are sent directly to xLAHQR, the implicit */
62
+ /* double shift QR algorithm. NMIN must be */
63
+ /* at least 11. */
64
+
65
+ /* ISPEC=13: (INWIN) Size of the deflation window. */
66
+ /* This is best set greater than or equal to */
67
+ /* the number of simultaneous shifts NS. */
68
+ /* Larger matrices benefit from larger deflation */
69
+ /* windows. */
70
+
71
+ /* ISPEC=14: (INIBL) Determines when to stop nibbling and */
72
+ /* invest in an (expensive) multi-shift QR sweep. */
73
+ /* If the aggressive early deflation subroutine */
74
+ /* finds LD converged eigenvalues from an order */
75
+ /* NW deflation window and LD.GT.(NW*NIBBLE)/100, */
76
+ /* then the next QR sweep is skipped and early */
77
+ /* deflation is applied immediately to the */
78
+ /* remaining active diagonal block. Setting */
79
+ /* IPARMQ(ISPEC=14) = 0 causes TTQRE to skip a */
80
+ /* multi-shift QR sweep whenever early deflation */
81
+ /* finds a converged eigenvalue. Setting */
82
+ /* IPARMQ(ISPEC=14) greater than or equal to 100 */
83
+ /* prevents TTQRE from skipping a multi-shift */
84
+ /* QR sweep. */
85
+
86
+ /* ISPEC=15: (NSHFTS) The number of simultaneous shifts in */
87
+ /* a multi-shift QR iteration. */
88
+
89
+ /* ISPEC=16: (IACC22) IPARMQ is set to 0, 1 or 2 with the */
90
+ /* following meanings. */
91
+ /* 0: During the multi-shift QR sweep, */
92
+ /* xLAQR5 does not accumulate reflections and */
93
+ /* does not use matrix-matrix multiply to */
94
+ /* update the far-from-diagonal matrix */
95
+ /* entries. */
96
+ /* 1: During the multi-shift QR sweep, */
97
+ /* xLAQR5 and/or xLAQRaccumulates reflections and uses */
98
+ /* matrix-matrix multiply to update the */
99
+ /* far-from-diagonal matrix entries. */
100
+ /* 2: During the multi-shift QR sweep. */
101
+ /* xLAQR5 accumulates reflections and takes */
102
+ /* advantage of 2-by-2 block structure during */
103
+ /* matrix-matrix multiplies. */
104
+ /* (If xTRMM is slower than xGEMM, then */
105
+ /* IPARMQ(ISPEC=16)=1 may be more efficient than */
106
+ /* IPARMQ(ISPEC=16)=2 despite the greater level of */
107
+ /* arithmetic work implied by the latter choice.) */
108
+
109
+ /* NAME (input) character string */
110
+ /* Name of the calling subroutine */
111
+
112
+ /* OPTS (input) character string */
113
+ /* This is a concatenation of the string arguments to */
114
+ /* TTQRE. */
115
+
116
+ /* N (input) int scalar */
117
+ /* N is the order of the Hessenberg matrix H. */
118
+
119
+ /* ILO (input) INTEGER */
120
+ /* IHI (input) INTEGER */
121
+ /* It is assumed that H is already upper triangular */
122
+ /* in rows and columns 1:ILO-1 and IHI+1:N. */
123
+
124
+ /* LWORK (input) int scalar */
125
+ /* The amount of workspace available. */
126
+
127
+ /* Further Details */
128
+ /* =============== */
129
+
130
+ /* Little is known about how best to choose these parameters. */
131
+ /* It is possible to use different values of the parameters */
132
+ /* for each of CHSEQR, DHSEQR, SHSEQR and ZHSEQR. */
133
+
134
+ /* It is probably best to choose different parameters for */
135
+ /* different matrices and different parameters at different */
136
+ /* times during the iteration, but this has not been */
137
+ /* implemented --- yet. */
138
+
139
+
140
+ /* The best choices of most of the parameters depend */
141
+ /* in an ill-understood way on the relative execution */
142
+ /* rate of xLAQR3 and xLAQR5 and on the nature of each */
143
+ /* particular eigenvalue problem. Experiment may be the */
144
+ /* only practical way to determine which choices are most */
145
+ /* effective. */
146
+
147
+ /* Following is a list of default values supplied by IPARMQ. */
148
+ /* These defaults may be adjusted in order to attain better */
149
+ /* performance in any particular computational environment. */
150
+
151
+ /* IPARMQ(ISPEC=12) The xLAHQR vs xLAQR0 crossover point. */
152
+ /* Default: 75. (Must be at least 11.) */
153
+
154
+ /* IPARMQ(ISPEC=13) Recommended deflation window size. */
155
+ /* This depends on ILO, IHI and NS, the */
156
+ /* number of simultaneous shifts returned */
157
+ /* by IPARMQ(ISPEC=15). The default for */
158
+ /* (IHI-ILO+1).LE.500 is NS. The default */
159
+ /* for (IHI-ILO+1).GT.500 is 3*NS/2. */
160
+
161
+ /* IPARMQ(ISPEC=14) Nibble crossover point. Default: 14. */
162
+
163
+ /* IPARMQ(ISPEC=15) Number of simultaneous shifts, NS. */
164
+ /* a multi-shift QR iteration. */
165
+
166
+ /* If IHI-ILO+1 is ... */
167
+
168
+ /* greater than ...but less ... the */
169
+ /* or equal to ... than default is */
170
+
171
+ /* 0 30 NS = 2+ */
172
+ /* 30 60 NS = 4+ */
173
+ /* 60 150 NS = 10 */
174
+ /* 150 590 NS = ** */
175
+ /* 590 3000 NS = 64 */
176
+ /* 3000 6000 NS = 128 */
177
+ /* 6000 infinity NS = 256 */
178
+
179
+ /* (+) By default matrices of this order are */
180
+ /* passed to the implicit double shift routine */
181
+ /* xLAHQR. See IPARMQ(ISPEC=12) above. These */
182
+ /* values of NS are used only in case of a rare */
183
+ /* xLAHQR failure. */
184
+
185
+ /* (**) The asterisks (**) indicate an ad-hoc */
186
+ /* function increasing from 10 to 64. */
187
+
188
+ /* IPARMQ(ISPEC=16) Select structured matrix multiply. */
189
+ /* (See ISPEC=16 above for details.) */
190
+ /* Default: 3. */
191
+
192
+ /* ================================================================ */
193
+ inline int iparmq(int ispec, int ilo, int ihi) {
194
+
195
+ const int INMIN = 12, INWIN = 13, INIBL = 14, ISHFTS = 15, IACC22 = 16;
196
+ const int NMIN = 75, K22MIN = 14, KACMIN = 14, NIBBLE = 14, KNWSWP = 500;
197
+
198
+ int ns = 2, nh = ihi - ilo + 1;
199
+
200
+ if (ispec == ISHFTS || ispec == INWIN|| ispec == IACC22) {
201
+
202
+ /* ==== Set the number of simultaneous shifts ==== */
203
+ if (nh >= 30) ns = 4;
204
+ if (nh >= 60) ns = 10;
205
+ if (nh >= 150) ns = std::max(10, (int)(nh / std::round(std::log((float) (ihi - ilo + 1)) / log(2.f)))); /* Computing MAX */
206
+ if (nh >= 590) ns = 64;
207
+ if (nh >= 3000) ns = 128;
208
+ if (nh >= 6000) ns = 256;
209
+ ns = std::max(2,ns - ns % 2); /* Computing MAX */
210
+ }
211
+
212
+ if (ispec == INMIN) {
213
+ /* ===== Matrices of order smaller than NMIN get sent */
214
+ /* . to xLAHQR, the classic double shift algorithm. */
215
+ /* . This must be at least 11. ==== */
216
+ return NMIN;
217
+
218
+ } else if (ispec == INIBL) {
219
+
220
+ /* ==== INIBL: skip a multi-shift qr iteration and */
221
+ /* . whenever aggressive early deflation finds */
222
+ /* . at least (NIBBLE*(window size)/100) deflations. ==== */
223
+
224
+ return NIBBLE;
225
+
226
+ } else if (ispec == ISHFTS) {
227
+
228
+ /* ==== NSHFTS: The number of simultaneous shifts ===== */
229
+ return ns;
230
+
231
+ } else if (ispec == INWIN) {
232
+
233
+ /* ==== NW: deflation window size. ==== */
234
+
235
+ if (nh <= KNWSWP) return ns;
236
+ else return ns * 3 / 2;
237
+
238
+ } else if (ispec == 16) {
239
+
240
+ /* ==== IACC22: Whether to accumulate reflections */
241
+ /* . before updating the far-from-diagonal elements */
242
+ /* . and whether to use 2-by-2 block structure while */
243
+ /* . doing it. A small amount of work could be saved */
244
+ /* . by making this choice dependent also upon the */
245
+ /* . NH=IHI-ILO+1. */
246
+
247
+ if (ns >= KACMIN) return 1;
248
+ if (ns >= K22MIN) return 2;
249
+
250
+ }
251
+
252
+ return -1;
253
+ } /* iparmq_ */
254
+
255
+
256
+
257
+
258
+ /* Purpose */
259
+ /* ======= */
260
+
261
+ /* DGER performs the rank 1 operation */
262
+
263
+ /* A := alpha*x*y**T + A, */
264
+
265
+ /* where alpha is a scalar, x is an m element vector, y is an n element */
266
+ /* vector and A is an m by n matrix. */
267
+
268
+ /* Arguments */
269
+ /* ========== */
270
+
271
+ /* M - INTEGER. */
272
+ /* On entry, M specifies the number of rows of the matrix A. */
273
+ /* M must be at least zero. */
274
+ /* Unchanged on exit. */
275
+
276
+ /* N - INTEGER. */
277
+ /* On entry, N specifies the number of columns of the matrix A. */
278
+ /* N must be at least zero. */
279
+ /* Unchanged on exit. */
280
+
281
+ /* ALPHA - DOUBLE PRECISION. */
282
+ /* On entry, ALPHA specifies the scalar alpha. */
283
+ /* Unchanged on exit. */
284
+
285
+ /* X - DOUBLE PRECISION array of dimension at least */
286
+ /* ( 1 + ( m - 1 )*abs( INCX ) ). */
287
+ /* Before entry, the incremented array X must contain the m */
288
+ /* element vector x. */
289
+ /* Unchanged on exit. */
290
+
291
+ /* INCX - INTEGER. */
292
+ /* On entry, INCX specifies the increment for the elements of */
293
+ /* X. INCX must not be zero. */
294
+ /* Unchanged on exit. */
295
+
296
+ /* Y - DOUBLE PRECISION array of dimension at least */
297
+ /* ( 1 + ( n - 1 )*abs( INCY ) ). */
298
+ /* Before entry, the incremented array Y must contain the n */
299
+ /* element vector y. */
300
+ /* Unchanged on exit. */
301
+
302
+ /* INCY - INTEGER. */
303
+ /* On entry, INCY specifies the increment for the elements of */
304
+ /* Y. INCY must not be zero. */
305
+ /* Unchanged on exit. */
306
+
307
+ /* A - DOUBLE PRECISION array of DIMENSION ( LDA, n ). */
308
+ /* Before entry, the leading m by n part of the array A must */
309
+ /* contain the matrix of coefficients. On exit, A is */
310
+ /* overwritten by the updated matrix. */
311
+
312
+ /* LDA - INTEGER. */
313
+ /* On entry, LDA specifies the first dimension of A as declared */
314
+ /* in the calling (sub) program. LDA must be at least */
315
+ /* max( 1, m ). */
316
+ /* Unchanged on exit. */
317
+
318
+ /* Further Details */
319
+ /* =============== */
320
+
321
+ /* Level 2 Blas routine. */
322
+
323
+ /* -- Written on 22-October-1986. */
324
+ /* Jack Dongarra, Argonne National Lab. */
325
+ /* Jeremy Du Croz, Nag Central Office. */
326
+ /* Sven Hammarling, Nag Central Office. */
327
+ /* Richard Hanson, Sandia National Labs. */
328
+
329
+ /* ===================================================================== */
330
+
331
+ template <typename DType>
332
+ inline int ger(int m, int n, DType alpha, DType* x, int incx, DType* y, int incy, DType* a, int lda) {
333
+
334
+ // FIXME: Call BLAS ger if available
335
+
336
+ if (m < 0) {
337
+ return 1;
338
+ } else if (n < 0) {
339
+ return 2;
340
+ } else if (incx == 0) {
341
+ return 5;
342
+ } else if (incy == 0) {
343
+ return 7;
344
+ } else if (lda < std::max(1,m)) {
345
+ return 9;
346
+ }
347
+
348
+ if (m == 0 || n == 0 || alpha == 0) return 0; /* Quick return if possible. */
349
+
350
+ /* Start the operations. In this version the elements of A are */
351
+ /* accessed sequentially with one pass through A. */
352
+
353
+ // FIXME: These have been unrolled in a way that the compiler can handle. Collapse into a single case, or optimize
354
+ // FIXME: in a more modern way.
355
+
356
+ int jy = incy > 0 ? 0 : -(n-1) * incy;
357
+
358
+ if (incx == 1) {
359
+
360
+ for (size_t j = 0; j < n; ++j, jy += incy) {
361
+ if (y[jy] != 0) {
362
+ DType temp = alpha * y[jy];
363
+ for (size_t i = 0; i < m; ++i) {
364
+ a[i + j * lda] += x[i] * temp;
365
+ }
366
+ }
367
+ }
368
+
369
+ } else {
370
+
371
+ int kx = incx > 0 ? 0 : -(m-1) * incx;
372
+
373
+ for (size_t j = 0; j < n; ++j, jy += incy) {
374
+ if (y[jy] != 0) {
375
+ DType temp = alpha * y[jy];
376
+
377
+ for (size_t i = 0, ix = kx; i < m; ++i, ix += incx) {
378
+ a[i + j * lda] += x[ix] * temp;
379
+ }
380
+ }
381
+ }
382
+
383
+ }
384
+
385
+ return 0;
386
+
387
+ /* End of DGER . */
388
+
389
+ } /* dger_ */
390
+
391
+
392
+ /* Purpose */
393
+ /* ======= */
394
+
395
+ /* DSCAL scales a vector by a constant. */
396
+ /* uses unrolled loops for increment equal to one. */
397
+
398
+ /* Further Details */
399
+ /* =============== */
400
+
401
+ /* jack dongarra, linpack, 3/11/78. */
402
+ /* modified 3/93 to return if incx .le. 0. */
403
+ /* modified 12/3/93, array(1) declarations changed to array(*) */
404
+
405
+ /* ===================================================================== */
406
+
407
+ template <typename DType>
408
+ inline void scal(const int n, const DType da, DType* dx, const int incx) {
409
+
410
+ // This used to have unrolled loops, like dswap. They were in the way.
411
+
412
+ if (n <= 0 || incx <= 0) return;
413
+
414
+ for (int i = 0; incx < 0 ? i > n*incx : i < n*incx; i += incx) {
415
+ dx[i] = da * dx[i];
416
+ }
417
+ } /* scal */
418
+
419
+
420
+ /* Purpose */
421
+ /* ======= */
422
+
423
+ /* interchanges two vectors. */
424
+ /* uses unrolled loops for increments equal one. */
425
+
426
+ /* Further Details */
427
+ /* =============== */
428
+
429
+ /* jack dongarra, linpack, 3/11/78. */
430
+ /* modified 12/3/93, array(1) declarations changed to array(*) */
431
+
432
+ /* ===================================================================== */
433
+ // Formerly dswap
434
+ template <typename DType>
435
+ inline void swap(int n, DType *dx, int incx, DType *dy, int incy) {
436
+
437
+ /* Function Body */
438
+ if (n <= 0) return;
439
+
440
+ /*
441
+ * The NETLIB version of dswap has loops manually unrolled, per commented code below.
442
+ * This doesn't make sense with modern compilers, which know much more about arch-
443
+ * itectures than we do. Combine that with our use of templates, and it's much more
444
+ * efficient to let the compiler do the unrolling in most cases.
445
+ */
446
+ /*
447
+ if (incx == 1 && incy == 1) { // if both increments are 1
448
+
449
+
450
+ m = n % 3;
451
+ if (m) {
452
+ for (size_t i = 0; i < m; ++i) { // If number is not divisible by three, swap just one or two singly.
453
+ dtemp = dx[i];
454
+ dx[i] = dy[i];
455
+ dy[i] = dtemp;
456
+ }
457
+ if (n < 3) return;
458
+ }
459
+
460
+ for (i = m; i < n; i += 3) { // Why does it swap three at a time? -- John 8/27/12
461
+ DType dtemp = dx[i];
462
+ dx[i] = dy[i];
463
+ dy[i] = dtemp;
464
+
465
+ dtemp = dx[i + 1];
466
+ dx[i + 1] = dy[i + 1];
467
+ dy[i + 1] = dtemp;
468
+
469
+ dtemp = dx[i + 2];
470
+ dx[i + 2] = dy[i + 2];
471
+ dy[i + 2] = dtemp;
472
+ }
473
+
474
+ } else { // when any increment is not 1
475
+ */
476
+
477
+ // For negative increments, start at the end of the array.
478
+ int ix = incx < 0 ? (-n+1)*incx : 0,
479
+ iy = incy < 0 ? (-n+1)*incy : 0;
480
+
481
+ if (incx < 0) ix = (-n + 1) * incx;
482
+ if (incy < 0) iy = (-n + 1) * incy;
483
+
484
+ for (size_t i = 0; i < n; ++i, ix += incx, iy += incy) {
485
+ DType dtemp = dx[ix];
486
+ dx[ix] = dy[iy];
487
+ dy[iy] = dtemp;
488
+ }
489
+ /*} */
490
+ return;
491
+ } /* dswap */
492
+
493
+
494
+
495
+
496
+ /* Purpose */
497
+ /* ======= */
498
+
499
+ /* IDAMAX finds the index of element having max. absolute value. */
500
+
501
+ /* Further Details */
502
+ /* =============== */
503
+
504
+ /* jack dongarra, linpack, 3/11/78. */
505
+ /* modified 3/93 to return if incx .le. 0. */
506
+ /* modified 12/3/93, array(1) declarations changed to array(*) */
507
+
508
+ /* ===================================================================== */
509
+
510
+ template <typename DType>
511
+ inline int idamax(size_t n, DType *dx, int incx) {
512
+
513
+ /* Function Body */
514
+ if (n < 1 || incx <= 0) return -1;
515
+ if (n == 1) return 0;
516
+
517
+ DType dmax;
518
+ size_t imax = 0;
519
+
520
+ if (incx == 1) { // if incrementing by 1
521
+
522
+ dmax = abs(dx[0]);
523
+
524
+ for (size_t i = 1; i < n; ++i) {
525
+ if (std::abs(dx[i]) > dmax) {
526
+ imax = i;
527
+ dmax = std::abs(dx[i]);
528
+ }
529
+ }
530
+
531
+ } else { // if incrementing by more than 1
532
+
533
+ dmax = std::abs(dx[0]);
534
+
535
+ for (size_t i = 1, ix = incx; i < n; ++i, ix += incx) {
536
+ if (std::abs(dx[ix]) > dmax) {
537
+ imax = i;
538
+ dmax = std::abs(dx[ix]);
539
+ }
540
+ }
541
+ }
542
+ return imax;
543
+ } /* idamax_ */
544
+
545
+
546
+
547
+
548
+
549
+
550
+ /* -- LAPACK auxiliary routine (version 3.3.1) -- */
551
+ /* -- LAPACK is a software package provided by Univ. of Tennessee, -- */
552
+ /* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
553
+ /* -- April 2011 -- */
554
+
555
+ /* .. Scalar Arguments .. */
556
+ /* .. */
557
+
558
+ /* Purpose */
559
+ /* ======= */
560
+
561
+ /* IEEECK is called from the ILAENV to verify that Infinity and */
562
+ /* possibly NaN arithmetic is safe (i.e. will not trap). */
563
+ // FIXME: Can we use std::numeric_limits::traps for this?
564
+
565
+ /* Arguments */
566
+ /* ========= */
567
+
568
+ /* ISPEC (input) INTEGER */
569
+ /* Specifies whether to test just for inifinity arithmetic */
570
+ /* or whether to test for infinity and NaN arithmetic. */
571
+ /* = 0: Verify infinity arithmetic only. */
572
+ /* = 1: Verify infinity and NaN arithmetic. */
573
+
574
+ /* ZERO (input) REAL */
575
+ /* Must contain the value 0.0 */
576
+ /* This is passed to prevent the compiler from optimizing */
577
+ /* away this code. */
578
+
579
+ /* ONE (input) REAL */
580
+ /* Must contain the value 1.0 */
581
+ /* This is passed to prevent the compiler from optimizing */
582
+ /* away this code. */
583
+
584
+ /* RETURN VALUE: INTEGER */
585
+ /* = 0: Arithmetic failed to produce the correct answers */
586
+ /* = 1: Arithmetic produced the correct answers */
587
+
588
+ /* ===================================================================== */
589
+
590
+ /*
591
+ * Note from John: This seems totally unnecessary in modern C++.
592
+ * FIXME: Remove this after testing that on modern systems this always returns 1.
593
+ */
594
+
595
+ inline int ieeeck(bool ispec) {
596
+
597
+ float posinf = 1.0 / 0.0;
598
+ if (posinf <= 1.0) return 0;
599
+
600
+ float neginf = -1.0 / 0.0;
601
+ if (neginf >= 0.0) return 0;
602
+
603
+ float negzro = 1.0 / (neginf + 1.0);
604
+ if (negzro != 0.0) return 0;
605
+
606
+ neginf = 1.0 / negzro;
607
+ if (neginf >= 0.0) return 0;
608
+
609
+ float newzro = negzro + 0.0;
610
+ if (newzro != 0.0) return 0;
611
+
612
+ posinf = 1.0 / newzro;
613
+ if (posinf <= 1.0) return 0;
614
+
615
+ neginf *= posinf;
616
+ if (neginf >= 0.0) return 0;
617
+
618
+ posinf *= posinf;
619
+ if (posinf <= 1.0) return 0;
620
+
621
+
622
+ /* Return if we were only asked to check infinity arithmetic */
623
+
624
+ if (!ispec) return 1;
625
+
626
+ float nan1 = posinf + neginf;
627
+ float nan2 = posinf / neginf;
628
+ float nan3 = posinf / posinf;
629
+ float nan4 = posinf * 0.0;
630
+ float nan5 = neginf * negzro;
631
+ float nan6 = nan5 * 0.0;
632
+
633
+ if (nan1 == nan1) return 0;
634
+ if (nan2 == nan2) return 0;
635
+ if (nan3 == nan3) return 0;
636
+ if (nan4 == nan4) return 0;
637
+ if (nan5 == nan5) return 0;
638
+ if (nan6 == nan6) return 0;
639
+
640
+ return 1;
641
+ } /* ieeeck_ */
642
+
643
+
644
+
645
+
646
+ inline int ilaenv_block_size(int n2, int n4, const std::string& c2, const std::string& c3, const std::string& c4, bool sname, bool cname) {
647
+ if (c2 == "GE") { //(s_cmp(c2, "GE", (size_t)2, (size_t)2) == 0) {
648
+ if (c3 == "TRF") { //if (s_cmp(c3, "TRF", (size_t)3, (size_t)3) == 0) {
649
+ if (sname) return 64;
650
+ else return 64;
651
+ } else if (c3 == "QRF" || c3 == "RQF" || c3 == "LQF" || c3 == "QLF") { //(s_cmp(c3, "QRF", (size_t)3, (size_t)3) == 0 || s_cmp(c3, "RQF", (size_t)3, (size_t)3) == 0 || s_cmp(c3, "LQF", (size_t) 3, (size_t)3) == 0 || s_cmp(c3, "QLF", (size_t)3, (size_t)3) == 0) {
652
+ if (sname) return 32;
653
+ else return 32;
654
+ } else if (c3 == "HRD") {
655
+ if (sname) return 32;
656
+ else return 32;
657
+ } else if (c3 == "BRD") {
658
+ if (sname) return 32;
659
+ else return 32;
660
+ } else if (c3 == "TRI") {
661
+ if (sname) return 64;
662
+ else return 64;
663
+ }
664
+ } else if (c2 == "PO") {
665
+ if (c3 == "TRF") {
666
+ if (sname) return 64;
667
+ else return 64;
668
+ }
669
+ } else if (c2 == "SY") {
670
+ if (c3 == "TRF") {
671
+ if (sname) return 64;
672
+ else return 64;
673
+ } else if (sname && c3 == "TRD") {
674
+ return 32;
675
+ } else if (sname && c3 == "GST") {
676
+ return 64;
677
+ }
678
+ } else if (cname && c2 == "HE") {
679
+ if (c3 == "TRF") return 64;
680
+ else if (c3 == "TRD") return 32;
681
+ else if (c3 == "GST") return 64;
682
+ } else if (sname && c2 == "OR") {
683
+ if (c3.at(0) == 'G') {
684
+ if (c4 == "QR" || c4 == "RQ" || c4 == "LQ" || c4 == "QL" || c4 == "HR" || c4 == "TR" || c4 == "BR") return 32;
685
+ } else if (c3.at(0) == 'M') {
686
+ if (c4 == "QR" || c4 == "RQ" || c4 == "LQ" || c4 == "QL" || c4 == "HR" || c4 == "TR" || c4 == "BR") return 32;
687
+ }
688
+ } else if (cname && c2 == "UN") {
689
+ if (c3.at(0) == 'G') {
690
+ if (c4 == "QR" || c4 == "RQ" || c4 == "LQ" || c4 == "QL" || c4 == "HR" || c4 == "TR" || c4 == "BR") return 32;
691
+ } else if (c3.at(0) == 'M') {
692
+ if (c4 == "QR" || c4 == "RQ" || c4 == "LQ" || c4 == "QL" || c4 == "HR" || c4 == "TR" || c4 == "BR") return 32;
693
+ }
694
+ } else if (c2 == "GB") {
695
+ if (c3 == "TRF") {
696
+ if (sname) {
697
+ if (n4 <= 64) return 1;
698
+ else return 32;
699
+ } else {
700
+ if (n4 <= 64) return 1;
701
+ else return 32;
702
+ }
703
+ }
704
+ } else if (c2 == "PB") {
705
+ if (c3 == "TRF") {
706
+ if (sname) {
707
+ if (n2 <= 64) return 1;
708
+ else return 32;
709
+ } else {
710
+ if (n2 <= 64) return 1;
711
+ else return 32;
712
+ }
713
+ }
714
+ } else if (c2 == "TR") {
715
+ if (c3 == "TRI") {
716
+ if (sname) return 64;
717
+ else return 64;
718
+ }
719
+ } else if (c2 == "LA") {
720
+ if (c3 == "UUM") {
721
+ if (sname) return 64;
722
+ else return 64;
723
+ }
724
+ } else if (sname && c2 == "ST") {
725
+ if (c3 == "EBZ") return 1;
726
+ }
727
+ return 1;
728
+ }
729
+
730
+
731
+ inline int ilaenv_min_block_size(const std::string& c2, const std::string& c3, const std::string& c4, bool sname, bool cname) {
732
+
733
+ if (c2 == "GE") {
734
+ if (c3 == "QRF" || c3 == "RQF" || c3 == "LQF" || c3 == "QLF") {
735
+ if (sname) {
736
+ return 2;
737
+ } else {
738
+ return 2;
739
+ }
740
+ } else if (c3 == "HRD") {
741
+ if (sname) {
742
+ return 2;
743
+ } else {
744
+ return 2;
745
+ }
746
+ } else if (c3 == "BRD") {
747
+ if (sname) {
748
+ return 2;
749
+ } else {
750
+ return 2;
751
+ }
752
+ } else if (c3 == "TRI") {
753
+ if (sname) {
754
+ return 2;
755
+ } else {
756
+ return 2;
757
+ }
758
+ }
759
+ } else if (c2 == "SY") {
760
+ if (c3 == "TRF") {
761
+ if (sname) {
762
+ return 8;
763
+ } else {
764
+ return 8;
765
+ }
766
+ } else if (sname && c3 == "TRD") {
767
+ return 2;
768
+ }
769
+ } else if (cname && c2 == "HE") {
770
+ if (c3 == "TRD") {
771
+ return 2;
772
+ }
773
+ } else if (sname && c2 == "OR") {
774
+ if (c3.at(0) == 'G') {
775
+ if (c4 == "QR" || c4 == "RQ" || c4 == "LQ" || c4 == "QL" || c4 == "HR" || c4 == "TR" || c4 == "BR") {
776
+ return 2;
777
+ }
778
+ } else if (c3.at(0) == 'M') {
779
+ if (c4 == "QR" || c4 == "RQ" || c4 == "LQ" || c4 == "QL" || c4 == "HR" || c4 == "TR" || c4 == "BR") {
780
+ return 2;
781
+ }
782
+ }
783
+ } else if (cname && c2 == "UN") {
784
+ if (c3.at(0) == 'G') {
785
+ if (c4 == "QR" || c4 == "RQ" || c4 == "LQ" || c4 == "QL" || c4 == "HR" || c4 == "TR" || c4 == "BR") {
786
+ return 2;
787
+ }
788
+ } else if (c3.at(0) == 'M') {
789
+ if (c4 == "QR" || c4 == "RQ" || c4 == "LQ" || c4 == "QL" || c4 == "HR" || c4 == "TR" || c4 == "BR") {
790
+ return 2;
791
+ }
792
+ }
793
+ }
794
+ return 2;
795
+ }
796
+
797
+
798
+ inline int ilaenv_crossover_point(const std::string& c2, const std::string& c3, const std::string& c4, bool sname, bool cname) {
799
+ if (c2 == "GE") {
800
+ if (c3 == "QRF" || c3 == "RQF" || c3 == "LQF" || c3 == "QLF") {
801
+ if (sname) {
802
+ return 128;
803
+ } else {
804
+ return 128;
805
+ }
806
+ } else if (c3 == "HRD") {
807
+ if (sname) {
808
+ return 128;
809
+ } else {
810
+ return 128;
811
+ }
812
+ } else if (c3 == "BRD") {
813
+ if (sname) {
814
+ return 128;
815
+ } else {
816
+ return 128;
817
+ }
818
+ }
819
+ } else if (c2 == "SY") {
820
+ if (sname && c3 == "TRD") {
821
+ return 32;
822
+ }
823
+ } else if (cname && c2 == "HE") {
824
+ if (c3 == "TRD") {
825
+ return 32;
826
+ }
827
+ } else if (sname && c2 == "OR") {
828
+ if (c3.at(0) == 'G') {
829
+ if (c4 == "QR" || c4 == "RQ" || c4 == "LQ" || c4 == "QL" || c4 == "HR" || c4 == "TR" || c4 == "BR") {
830
+ return 128;
831
+ }
832
+ }
833
+ } else if (cname && c2 == "UN") {
834
+ if (c3.at(0) == 'G') {
835
+ if (c4 == "QR" || c4 == "RQ" || c4 == "LQ" || c4 == "QL" || c4 == "HR" || c4 == "TR" || c4 == "BR") {
836
+ return 128;
837
+ }
838
+ }
839
+ }
840
+ return 0;
841
+ }
842
+
843
+
844
+ /* -- LAPACK auxiliary routine (version 3.2.1) -- */
845
+
846
+ /* -- April 2009 -- */
847
+
848
+ /* -- LAPACK is a software package provided by Univ. of Tennessee, -- */
849
+ /* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
850
+
851
+ /* .. Scalar Arguments .. */
852
+ /* .. */
853
+
854
+ /* Purpose */
855
+ /* ======= */
856
+
857
+ /* ILAENV is called from the LAPACK routines to choose problem-dependent */
858
+ /* parameters for the local environment. See ISPEC for a description of */
859
+ /* the parameters. */
860
+
861
+ /* ILAENV returns an INTEGER */
862
+ /* if ILAENV >= 0: ILAENV returns the value of the parameter specified by ISPEC */
863
+ /* if ILAENV < 0: if ILAENV = -k, the k-th argument had an illegal value. */
864
+
865
+ /* This version provides a set of parameters which should give good, */
866
+ /* but not optimal, performance on many of the currently available */
867
+ /* computers. Users are encouraged to modify this subroutine to set */
868
+ /* the tuning parameters for their particular machine using the option */
869
+ /* and problem size information in the arguments. */
870
+
871
+ /* This routine will not function correctly if it is converted to all */
872
+ /* lower case. Converting it to all upper case is allowed. */
873
+
874
+ /* Arguments */
875
+ /* ========= */
876
+
877
+ /* ISPEC (input) INTEGER */
878
+ /* Specifies the parameter to be returned as the value of */
879
+ /* ILAENV. */
880
+ /* = 1: the optimal blocksize; if this value is 1, an unblocked */
881
+ /* algorithm will give the best performance. */
882
+ /* = 2: the minimum block size for which the block routine */
883
+ /* should be used; if the usable block size is less than */
884
+ /* this value, an unblocked routine should be used. */
885
+ /* = 3: the crossover point (in a block routine, for N less */
886
+ /* than this value, an unblocked routine should be used) */
887
+ /* = 4: the number of shifts, used in the nonsymmetric */
888
+ /* eigenvalue routines (DEPRECATED) */
889
+ /* = 5: the minimum column dimension for blocking to be used; */
890
+ /* rectangular blocks must have dimension at least k by m, */
891
+ /* where k is given by ILAENV(2,...) and m by ILAENV(5,...) */
892
+ /* = 6: the crossover point for the SVD (when reducing an m by n */
893
+ /* matrix to bidiagonal form, if max(m,n)/min(m,n) exceeds */
894
+ /* this value, a QR factorization is used first to reduce */
895
+ /* the matrix to a triangular form.) */
896
+ /* = 7: the number of processors */
897
+ /* = 8: the crossover point for the multishift QR method */
898
+ /* for nonsymmetric eigenvalue problems (DEPRECATED) */
899
+ /* = 9: maximum size of the subproblems at the bottom of the */
900
+ /* computation tree in the divide-and-conquer algorithm */
901
+ /* (used by xGELSD and xGESDD) */
902
+ /* =10: ieee NaN arithmetic can be trusted not to trap */
903
+ /* =11: infinity arithmetic can be trusted not to trap */
904
+ /* 12 <= ISPEC <= 16: */
905
+ /* xHSEQR or one of its subroutines, */
906
+ /* see IPARMQ for detailed explanation */
907
+
908
+ /* NAME (input) CHARACTER*(*) */
909
+ /* The name of the calling subroutine, in either upper case or */
910
+ /* lower case. */
911
+
912
+ /* OPTS (input) CHARACTER*(*) */
913
+ /* The character options to the subroutine NAME, concatenated */
914
+ /* into a single character string. For example, UPLO = 'U', */
915
+ /* TRANS = 'T', and DIAG = 'N' for a triangular routine would */
916
+ /* be specified as OPTS = 'UTN'. */
917
+
918
+ /* N1 (input) INTEGER */
919
+ /* N2 (input) INTEGER */
920
+ /* N3 (input) INTEGER */
921
+ /* N4 (input) INTEGER */
922
+ /* Problem dimensions for the subroutine NAME; these may not all */
923
+ /* be required. */
924
+
925
+ /* Further Details */
926
+ /* =============== */
927
+
928
+ /* The following conventions have been used when calling ILAENV from the */
929
+ /* LAPACK routines: */
930
+ /* 1) OPTS is a concatenation of all of the character options to */
931
+ /* subroutine NAME, in the same order that they appear in the */
932
+ /* argument list for NAME, even if they are not used in determining */
933
+ /* the value of the parameter specified by ISPEC. */
934
+ /* 2) The problem dimensions N1, N2, N3, N4 are specified in the order */
935
+ /* that they appear in the argument list for NAME. N1 is used */
936
+ /* first, N2 second, and so on, and unused problem dimensions are */
937
+ /* passed a value of -1. */
938
+ /* 3) The parameter value returned by ILAENV is checked for validity in */
939
+ /* the calling subroutine. For example, ILAENV is used to retrieve */
940
+ /* the optimal blocksize for STRTRI as follows: */
941
+
942
+ /* NB = ILAENV( 1, 'STRTRI', UPLO // DIAG, N, -1, -1, -1 ) */
943
+ /* IF( NB.LE.1 ) NB = MAX( 1, N ) */
944
+
945
+ /* ===================================================================== */
946
+ inline int ilaenv(int ispec, const std::string& name, int n1, int n2, int n3, int n4) {
947
+
948
+ if (ispec < 1 || ispec > 3) {
949
+ switch (ispec) {
950
+ case 4: return 6; /* ISPEC = 4: number of shifts (used by xHSEQR) */
951
+ case 5: return 2; /* ISPEC = 5: minimum column dimension (not used) */
952
+ case 6: /* ISPEC = 6: crossover point for SVD (used by xGELSS and xGESVD) */
953
+ return (int) ((float)std::min(n1, n2) * 1.6f);
954
+ case 7: return 1; /* ISPEC = 7: number of processors (not used) */
955
+ case 8: return 50; /* ISPEC = 8: crossover point for multishift (used by xHSEQR) */
956
+ case 9: return 25; /* ISPEC = 9: maximum size of the subproblems at the bottom of the */
957
+ /* computation tree in the divide-and-conquer algorithm */
958
+ /* (used by xGELSD and xGESDD) */
959
+ case 10: /* ISPEC = 10: ieee NaN arithmetic can be trusted not to trap */
960
+ return ieeeck(1);
961
+
962
+ case 11: /* ISPEC = 11: infinity arithmetic can be trusted not to trap */
963
+ return ieeeck(0);
964
+
965
+ default:
966
+ if (ispec >= 12 && ispec <= 16) { /* 12 <= ISPEC <= 16: xHSEQR or one of its subroutines. */
967
+ return iparmq(ispec, n2, n3);
968
+ } else {
969
+ return -1; /* Invalid value for ISPEC */
970
+ }
971
+ }
972
+ }
973
+
974
+
975
+ /* Convert NAME to upper case if the first character is lower case. */
976
+
977
+ std::string subnam(name);
978
+ std::transform(subnam.begin(), subnam.end(), subnam.begin(), ::toupper);
979
+ std::string c1(subnam);
980
+
981
+ bool sname = c1.at(0) == 'S' || c1.at(0) == 'D',
982
+ cname = c1.at(0) == 'C' || c1.at(0) == 'Z';
983
+
984
+ if (! (cname || sname)) return 1;
985
+
986
+ std::string c2(subnam.substr(1, 2)),
987
+ c3(subnam.substr(3, 3)),
988
+ c4(c3.substr(1, 2));
989
+
990
+ if (ispec == 2) return ilaenv_min_block_size(c2, c3, c4, sname, cname);
991
+ if (ispec == 3) return ilaenv_crossover_point(c2, c3, c4, sname, cname);
992
+ return ilaenv_block_size(n2, n4, c2, c3, c4, sname, cname);
993
+
994
+ } /* ilaenv_ */
995
+
996
+
997
+
998
+ /* > \brief \b DGETF2 */
999
+
1000
+ /* =========== DOCUMENTATION =========== */
1001
+
1002
+ /* Online html documentation available at */
1003
+ /* http://www.netlib.org/lapack/explore-html/ */
1004
+
1005
+ /* > \htmlonly */
1006
+ /* > Download DGETF2 + dependencies */
1007
+ /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dgetf2.
1008
+ f"> */
1009
+ /* > [TGZ]</a> */
1010
+ /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dgetf2.
1011
+ f"> */
1012
+ /* > [ZIP]</a> */
1013
+ /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dgetf2.
1014
+ f"> */
1015
+ /* > [TXT]</a> */
1016
+ /* > \endhtmlonly */
1017
+
1018
+ /* Definition: */
1019
+ /* =========== */
1020
+
1021
+ /* SUBROUTINE DGETF2( M, N, A, LDA, IPIV, INFO ) */
1022
+
1023
+ /* .. Scalar Arguments .. */
1024
+ /* INTEGER INFO, LDA, M, N */
1025
+ /* .. */
1026
+ /* .. Array Arguments .. */
1027
+ /* INTEGER IPIV( * ) */
1028
+ /* DOUBLE PRECISION A( LDA, * ) */
1029
+ /* .. */
1030
+
1031
+
1032
+ /* > \par Purpose: */
1033
+ /* ============= */
1034
+ /* > */
1035
+ /* > \verbatim */
1036
+ /* > */
1037
+ /* > DGETF2 computes an LU factorization of a general m-by-n matrix A */
1038
+ /* > using partial pivoting with row interchanges. */
1039
+ /* > */
1040
+ /* > The factorization has the form */
1041
+ /* > A = P * L * U */
1042
+ /* > where P is a permutation matrix, L is lower triangular with unit */
1043
+ /* > diagonal elements (lower trapezoidal if m > n), and U is upper */
1044
+ /* > triangular (upper trapezoidal if m < n). */
1045
+ /* > */
1046
+ /* > This is the right-looking Level 2 BLAS version of the algorithm. */
1047
+ /* > \endverbatim */
1048
+
1049
+ /* Arguments: */
1050
+ /* ========== */
1051
+
1052
+ /* > \param[in] M */
1053
+ /* > \verbatim */
1054
+ /* > M is INTEGER */
1055
+ /* > The number of rows of the matrix A. M >= 0. */
1056
+ /* > \endverbatim */
1057
+ /* > */
1058
+ /* > \param[in] N */
1059
+ /* > \verbatim */
1060
+ /* > N is INTEGER */
1061
+ /* > The number of columns of the matrix A. N >= 0. */
1062
+ /* > \endverbatim */
1063
+ /* > */
1064
+ /* > \param[in,out] A */
1065
+ /* > \verbatim */
1066
+ /* > A is DOUBLE PRECISION array, dimension (LDA,N) */
1067
+ /* > On entry, the m by n matrix to be factored. */
1068
+ /* > On exit, the factors L and U from the factorization */
1069
+ /* > A = P*L*U; the unit diagonal elements of L are not stored. */
1070
+ /* > \endverbatim */
1071
+ /* > */
1072
+ /* > \param[in] LDA */
1073
+ /* > \verbatim */
1074
+ /* > LDA is INTEGER */
1075
+ /* > The leading dimension of the array A. LDA >= max(1,M). */
1076
+ /* > \endverbatim */
1077
+ /* > */
1078
+ /* > \param[out] IPIV */
1079
+ /* > \verbatim */
1080
+ /* > IPIV is INTEGER array, dimension (min(M,N)) */
1081
+ /* > The pivot indices; for 1 <= i <= min(M,N), row i of the */
1082
+ /* > matrix was interchanged with row IPIV(i). */
1083
+ /* > \endverbatim */
1084
+ /* > */
1085
+ /* > \param[out] INFO */
1086
+ /* > \verbatim */
1087
+ /* > INFO is INTEGER */
1088
+ /* > = 0: successful exit */
1089
+ /* > < 0: if INFO = -k, the k-th argument had an illegal value */
1090
+ /* > > 0: if INFO = k, U(k,k) is exactly zero. The factorization */
1091
+ /* > has been completed, but the factor U is exactly */
1092
+ /* > singular, and division by zero will occur if it is used */
1093
+ /* > to solve a system of equations. */
1094
+ /* > \endverbatim */
1095
+
1096
+ /* Authors: */
1097
+ /* ======== */
1098
+
1099
+ /* > \author Univ. of Tennessee */
1100
+ /* > \author Univ. of California Berkeley */
1101
+ /* > \author Univ. of Colorado Denver */
1102
+ /* > \author NAG Ltd. */
1103
+
1104
+ /* > \date November 2011 */
1105
+
1106
+ /* > \ingroup doubleGEcomputational */
1107
+
1108
+ /* ===================================================================== */
1109
+
1110
+ template <typename DType>
1111
+ inline int getf2(const int m, const int n, DType* a, const int lda, int *ipiv) {
1112
+
1113
+ /* Function Body */
1114
+ if (m < 0) return -1; // error
1115
+ else if (n < 0) return -2; // error
1116
+ else if (lda < std::max(1,m)) return -4; // error
1117
+
1118
+
1119
+ if (m == 0 || n == 0) return 0; /* Quick return if possible */
1120
+
1121
+ for (size_t j = 0; j < std::min(m,n); ++j) { // changed
1122
+
1123
+ /* Find pivot and test for singularity. */
1124
+
1125
+ int jp = j - 1 + idamax<DType>(m-j+1, &a[j + j * lda], 1);
1126
+
1127
+ ipiv[j] = jp;
1128
+
1129
+
1130
+ if (a[jp + j*lda] != 0) {
1131
+
1132
+ /* Apply the interchange to columns 1:N. */
1133
+ // (Don't swap two columns that are the same.)
1134
+ if (jp != j) swap<DType>(n, &a[j], lda, &a[jp], lda);
1135
+
1136
+ /* Compute elements J+1:M of J-th column. */
1137
+
1138
+ if (j < m-1) {
1139
+ if (std::abs(a[j+j*lda]) >= std::numeric_limits<DType>::min()) {
1140
+ scal<DType>(m-j, 1.0 / a[j+j*lda], &a[j+1+j*lda], 1);
1141
+ } else {
1142
+ for (size_t i = 0; i < m-j; ++i) { // changed
1143
+ a[j+i+j*lda] /= a[j+j*lda];
1144
+ }
1145
+ }
1146
+ }
1147
+
1148
+ } else { // singular matrix
1149
+ return j; // U(j,j) is exactly zero, div by zero if answer is used to solve a system of equations.
1150
+ }
1151
+
1152
+ if (j < std::min(m,n)-1) /* Update trailing submatrix. */
1153
+ ger<DType>(m-j, n-j, -1.0, &a[j+1+j*lda], 1, &a[j+(j+1)*lda], lda, &a[j+1+(j+1)*lda], lda);
1154
+
1155
+ }
1156
+ return 0;
1157
+ } /* dgetf2_ */
1158
+
1159
+
1160
+
1161
+ } // end namespace lapack
1162
+
1163
+ /*
1164
+ * Function signature conversion for LAPACK's scal function.
1165
+ */
1166
+ template <typename DType>
1167
+ inline void clapack_scal(const int n, const void* da, void* dx, const int incx) {
1168
+ // FIXME: See if we can call the clapack version instead of our C++ version.
1169
+ nm::math::lapack::scal<DType>(n, *reinterpret_cast<const DType*>(da), reinterpret_cast<DType*>(dx), incx);
1170
+ }
1171
+
1172
+
1173
+ }}
1174
+
1175
+ #endif