nmatrix 0.0.6 → 0.0.7
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.gitignore +2 -0
- data/Gemfile +5 -0
- data/History.txt +97 -0
- data/Manifest.txt +34 -7
- data/README.rdoc +13 -13
- data/Rakefile +36 -26
- data/ext/nmatrix/data/data.cpp +15 -2
- data/ext/nmatrix/data/data.h +4 -0
- data/ext/nmatrix/data/ruby_object.h +5 -14
- data/ext/nmatrix/extconf.rb +3 -2
- data/ext/nmatrix/{util/math.cpp → math.cpp} +296 -6
- data/ext/nmatrix/math/asum.h +143 -0
- data/ext/nmatrix/math/geev.h +82 -0
- data/ext/nmatrix/math/gemm.h +267 -0
- data/ext/nmatrix/math/gemv.h +208 -0
- data/ext/nmatrix/math/ger.h +96 -0
- data/ext/nmatrix/math/gesdd.h +80 -0
- data/ext/nmatrix/math/gesvd.h +78 -0
- data/ext/nmatrix/math/getf2.h +86 -0
- data/ext/nmatrix/math/getrf.h +240 -0
- data/ext/nmatrix/math/getri.h +107 -0
- data/ext/nmatrix/math/getrs.h +125 -0
- data/ext/nmatrix/math/idamax.h +86 -0
- data/ext/nmatrix/{util → math}/lapack.h +60 -356
- data/ext/nmatrix/math/laswp.h +165 -0
- data/ext/nmatrix/math/long_dtype.h +52 -0
- data/ext/nmatrix/math/math.h +1154 -0
- data/ext/nmatrix/math/nrm2.h +181 -0
- data/ext/nmatrix/math/potrs.h +125 -0
- data/ext/nmatrix/math/rot.h +141 -0
- data/ext/nmatrix/math/rotg.h +115 -0
- data/ext/nmatrix/math/scal.h +73 -0
- data/ext/nmatrix/math/swap.h +73 -0
- data/ext/nmatrix/math/trsm.h +383 -0
- data/ext/nmatrix/nmatrix.cpp +176 -152
- data/ext/nmatrix/nmatrix.h +1 -2
- data/ext/nmatrix/ruby_constants.cpp +9 -4
- data/ext/nmatrix/ruby_constants.h +1 -0
- data/ext/nmatrix/storage/dense.cpp +57 -41
- data/ext/nmatrix/storage/list.cpp +52 -50
- data/ext/nmatrix/storage/storage.cpp +59 -43
- data/ext/nmatrix/storage/yale.cpp +352 -333
- data/ext/nmatrix/storage/yale.h +4 -0
- data/lib/nmatrix.rb +2 -2
- data/lib/nmatrix/blas.rb +4 -4
- data/lib/nmatrix/enumerate.rb +241 -0
- data/lib/nmatrix/lapack.rb +54 -1
- data/lib/nmatrix/math.rb +462 -0
- data/lib/nmatrix/nmatrix.rb +210 -486
- data/lib/nmatrix/nvector.rb +0 -62
- data/lib/nmatrix/rspec.rb +75 -0
- data/lib/nmatrix/shortcuts.rb +136 -108
- data/lib/nmatrix/version.rb +1 -1
- data/spec/blas_spec.rb +20 -12
- data/spec/elementwise_spec.rb +22 -13
- data/spec/io_spec.rb +1 -0
- data/spec/lapack_spec.rb +197 -0
- data/spec/nmatrix_spec.rb +39 -38
- data/spec/nvector_spec.rb +3 -9
- data/spec/rspec_monkeys.rb +29 -0
- data/spec/rspec_spec.rb +34 -0
- data/spec/shortcuts_spec.rb +14 -16
- data/spec/slice_spec.rb +242 -186
- data/spec/spec_helper.rb +19 -0
- metadata +33 -5
- data/ext/nmatrix/util/math.h +0 -2612
@@ -0,0 +1,107 @@
|
|
1
|
+
/////////////////////////////////////////////////////////////////////
|
2
|
+
// = NMatrix
|
3
|
+
//
|
4
|
+
// A linear algebra library for scientific computation in Ruby.
|
5
|
+
// NMatrix is part of SciRuby.
|
6
|
+
//
|
7
|
+
// NMatrix was originally inspired by and derived from NArray, by
|
8
|
+
// Masahiro Tanaka: http://narray.rubyforge.org
|
9
|
+
//
|
10
|
+
// == Copyright Information
|
11
|
+
//
|
12
|
+
// SciRuby is Copyright (c) 2010 - 2013, Ruby Science Foundation
|
13
|
+
// NMatrix is Copyright (c) 2013, Ruby Science Foundation
|
14
|
+
//
|
15
|
+
// Please see LICENSE.txt for additional copyright notices.
|
16
|
+
//
|
17
|
+
// == Contributing
|
18
|
+
//
|
19
|
+
// By contributing source code to SciRuby, you agree to be bound by
|
20
|
+
// our Contributor Agreement:
|
21
|
+
//
|
22
|
+
// * https://github.com/SciRuby/sciruby/wiki/Contributor-Agreement
|
23
|
+
//
|
24
|
+
// == getri.h
|
25
|
+
//
|
26
|
+
// getri function in native C++.
|
27
|
+
//
|
28
|
+
|
29
|
+
/*
|
30
|
+
* Automatically Tuned Linear Algebra Software v3.8.4
|
31
|
+
* (C) Copyright 1999 R. Clint Whaley
|
32
|
+
*
|
33
|
+
* Redistribution and use in source and binary forms, with or without
|
34
|
+
* modification, are permitted provided that the following conditions
|
35
|
+
* are met:
|
36
|
+
* 1. Redistributions of source code must retain the above copyright
|
37
|
+
* notice, this list of conditions and the following disclaimer.
|
38
|
+
* 2. Redistributions in binary form must reproduce the above copyright
|
39
|
+
* notice, this list of conditions, and the following disclaimer in the
|
40
|
+
* documentation and/or other materials provided with the distribution.
|
41
|
+
* 3. The name of the ATLAS group or the names of its contributers may
|
42
|
+
* not be used to endorse or promote products derived from this
|
43
|
+
* software without specific written permission.
|
44
|
+
*
|
45
|
+
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
46
|
+
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
47
|
+
* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
48
|
+
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE ATLAS GROUP OR ITS CONTRIBUTORS
|
49
|
+
* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
50
|
+
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
51
|
+
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
52
|
+
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
53
|
+
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
54
|
+
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
55
|
+
* POSSIBILITY OF SUCH DAMAGE.
|
56
|
+
*
|
57
|
+
*/
|
58
|
+
|
59
|
+
#ifndef GETRI_H
|
60
|
+
#define GETRI_H
|
61
|
+
|
62
|
+
namespace nm { namespace math {
|
63
|
+
|
64
|
+
template <typename DType>
|
65
|
+
inline int getri(const enum CBLAS_ORDER order, const int n, DType* a, const int lda, const int* ipiv) {
|
66
|
+
rb_raise(rb_eNotImpError, "getri not yet implemented for non-BLAS dtypes");
|
67
|
+
return 0;
|
68
|
+
}
|
69
|
+
|
70
|
+
#ifdef HAVE_CLAPACK_H
|
71
|
+
template <>
|
72
|
+
inline int getri(const enum CBLAS_ORDER order, const int n, float* a, const int lda, const int* ipiv) {
|
73
|
+
return clapack_sgetri(order, n, a, lda, ipiv);
|
74
|
+
}
|
75
|
+
|
76
|
+
template <>
|
77
|
+
inline int getri(const enum CBLAS_ORDER order, const int n, double* a, const int lda, const int* ipiv) {
|
78
|
+
return clapack_dgetri(order, n, a, lda, ipiv);
|
79
|
+
}
|
80
|
+
|
81
|
+
template <>
|
82
|
+
inline int getri(const enum CBLAS_ORDER order, const int n, Complex64* a, const int lda, const int* ipiv) {
|
83
|
+
return clapack_cgetri(order, n, reinterpret_cast<void*>(a), lda, ipiv);
|
84
|
+
}
|
85
|
+
|
86
|
+
template <>
|
87
|
+
inline int getri(const enum CBLAS_ORDER order, const int n, Complex128* a, const int lda, const int* ipiv) {
|
88
|
+
return clapack_zgetri(order, n, reinterpret_cast<void*>(a), lda, ipiv);
|
89
|
+
}
|
90
|
+
#endif
|
91
|
+
|
92
|
+
/*
|
93
|
+
* Function signature conversion for calling LAPACK's getri functions as directly as possible.
|
94
|
+
*
|
95
|
+
* For documentation: http://www.netlib.org/lapack/double/dgetri.f
|
96
|
+
*
|
97
|
+
* This function should normally go in math.cpp, but we need it to be available to nmatrix.cpp.
|
98
|
+
*/
|
99
|
+
template <typename DType>
|
100
|
+
inline int clapack_getri(const enum CBLAS_ORDER order, const int n, void* a, const int lda, const int* ipiv) {
|
101
|
+
return getri<DType>(order, n, reinterpret_cast<DType*>(a), lda, ipiv);
|
102
|
+
}
|
103
|
+
|
104
|
+
|
105
|
+
} } // end nm::math
|
106
|
+
|
107
|
+
#endif // GETRI_H
|
@@ -0,0 +1,125 @@
|
|
1
|
+
/////////////////////////////////////////////////////////////////////
|
2
|
+
// = NMatrix
|
3
|
+
//
|
4
|
+
// A linear algebra library for scientific computation in Ruby.
|
5
|
+
// NMatrix is part of SciRuby.
|
6
|
+
//
|
7
|
+
// NMatrix was originally inspired by and derived from NArray, by
|
8
|
+
// Masahiro Tanaka: http://narray.rubyforge.org
|
9
|
+
//
|
10
|
+
// == Copyright Information
|
11
|
+
//
|
12
|
+
// SciRuby is Copyright (c) 2010 - 2013, Ruby Science Foundation
|
13
|
+
// NMatrix is Copyright (c) 2013, Ruby Science Foundation
|
14
|
+
//
|
15
|
+
// Please see LICENSE.txt for additional copyright notices.
|
16
|
+
//
|
17
|
+
// == Contributing
|
18
|
+
//
|
19
|
+
// By contributing source code to SciRuby, you agree to be bound by
|
20
|
+
// our Contributor Agreement:
|
21
|
+
//
|
22
|
+
// * https://github.com/SciRuby/sciruby/wiki/Contributor-Agreement
|
23
|
+
//
|
24
|
+
// == getrs.h
|
25
|
+
//
|
26
|
+
// getrs function in native C++.
|
27
|
+
//
|
28
|
+
|
29
|
+
/*
|
30
|
+
* Automatically Tuned Linear Algebra Software v3.8.4
|
31
|
+
* (C) Copyright 1999 R. Clint Whaley
|
32
|
+
*
|
33
|
+
* Redistribution and use in source and binary forms, with or without
|
34
|
+
* modification, are permitted provided that the following conditions
|
35
|
+
* are met:
|
36
|
+
* 1. Redistributions of source code must retain the above copyright
|
37
|
+
* notice, this list of conditions and the following disclaimer.
|
38
|
+
* 2. Redistributions in binary form must reproduce the above copyright
|
39
|
+
* notice, this list of conditions, and the following disclaimer in the
|
40
|
+
* documentation and/or other materials provided with the distribution.
|
41
|
+
* 3. The name of the ATLAS group or the names of its contributers may
|
42
|
+
* not be used to endorse or promote products derived from this
|
43
|
+
* software without specific written permission.
|
44
|
+
*
|
45
|
+
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
46
|
+
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
47
|
+
* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
48
|
+
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE ATLAS GROUP OR ITS CONTRIBUTORS
|
49
|
+
* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
50
|
+
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
51
|
+
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
52
|
+
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
53
|
+
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
54
|
+
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
55
|
+
* POSSIBILITY OF SUCH DAMAGE.
|
56
|
+
*
|
57
|
+
*/
|
58
|
+
|
59
|
+
#ifndef GETRS_H
|
60
|
+
#define GETRS_H
|
61
|
+
|
62
|
+
extern "C" {
|
63
|
+
#include <cblas.h>
|
64
|
+
}
|
65
|
+
|
66
|
+
namespace nm { namespace math {
|
67
|
+
|
68
|
+
|
69
|
+
/*
|
70
|
+
* Solves a system of linear equations A*X = B with a general NxN matrix A using the LU factorization computed by GETRF.
|
71
|
+
*
|
72
|
+
* From ATLAS 3.8.0.
|
73
|
+
*/
|
74
|
+
template <typename DType>
|
75
|
+
int getrs(const enum CBLAS_ORDER Order, const enum CBLAS_TRANSPOSE Trans, const int N, const int NRHS, const DType* A,
|
76
|
+
const int lda, const int* ipiv, DType* B, const int ldb)
|
77
|
+
{
|
78
|
+
// enum CBLAS_DIAG Lunit, Uunit; // These aren't used. Not sure why they're declared in ATLAS' src.
|
79
|
+
|
80
|
+
if (!N || !NRHS) return 0;
|
81
|
+
|
82
|
+
const DType ONE = 1;
|
83
|
+
|
84
|
+
if (Order == CblasColMajor) {
|
85
|
+
if (Trans == CblasNoTrans) {
|
86
|
+
nm::math::laswp<DType>(NRHS, B, ldb, 0, N, ipiv, 1);
|
87
|
+
nm::math::trsm<DType>(Order, CblasLeft, CblasLower, CblasNoTrans, CblasUnit, N, NRHS, ONE, A, lda, B, ldb);
|
88
|
+
nm::math::trsm<DType>(Order, CblasLeft, CblasUpper, CblasNoTrans, CblasNonUnit, N, NRHS, ONE, A, lda, B, ldb);
|
89
|
+
} else {
|
90
|
+
nm::math::trsm<DType>(Order, CblasLeft, CblasUpper, Trans, CblasNonUnit, N, NRHS, ONE, A, lda, B, ldb);
|
91
|
+
nm::math::trsm<DType>(Order, CblasLeft, CblasLower, Trans, CblasUnit, N, NRHS, ONE, A, lda, B, ldb);
|
92
|
+
nm::math::laswp<DType>(NRHS, B, ldb, 0, N, ipiv, -1);
|
93
|
+
}
|
94
|
+
} else {
|
95
|
+
if (Trans == CblasNoTrans) {
|
96
|
+
nm::math::trsm<DType>(Order, CblasRight, CblasLower, CblasTrans, CblasNonUnit, NRHS, N, ONE, A, lda, B, ldb);
|
97
|
+
nm::math::trsm<DType>(Order, CblasRight, CblasUpper, CblasTrans, CblasUnit, NRHS, N, ONE, A, lda, B, ldb);
|
98
|
+
nm::math::laswp<DType>(NRHS, B, ldb, 0, N, ipiv, -1);
|
99
|
+
} else {
|
100
|
+
nm::math::laswp<DType>(NRHS, B, ldb, 0, N, ipiv, 1);
|
101
|
+
nm::math::trsm<DType>(Order, CblasRight, CblasUpper, CblasNoTrans, CblasUnit, NRHS, N, ONE, A, lda, B, ldb);
|
102
|
+
nm::math::trsm<DType>(Order, CblasRight, CblasLower, CblasNoTrans, CblasNonUnit, NRHS, N, ONE, A, lda, B, ldb);
|
103
|
+
}
|
104
|
+
}
|
105
|
+
return 0;
|
106
|
+
}
|
107
|
+
|
108
|
+
|
109
|
+
/*
|
110
|
+
* Function signature conversion for calling LAPACK's getrs functions as directly as possible.
|
111
|
+
*
|
112
|
+
* For documentation: http://www.netlib.org/lapack/double/dgetrs.f
|
113
|
+
*
|
114
|
+
* This function should normally go in math.cpp, but we need it to be available to nmatrix.cpp.
|
115
|
+
*/
|
116
|
+
template <typename DType>
|
117
|
+
inline int clapack_getrs(const enum CBLAS_ORDER order, const enum CBLAS_TRANSPOSE trans, const int n, const int nrhs,
|
118
|
+
const void* a, const int lda, const int* ipiv, void* b, const int ldb) {
|
119
|
+
return getrs<DType>(order, trans, n, nrhs, reinterpret_cast<const DType*>(a), lda, ipiv, reinterpret_cast<DType*>(b), ldb);
|
120
|
+
}
|
121
|
+
|
122
|
+
|
123
|
+
} } // end nm::math
|
124
|
+
|
125
|
+
#endif // GETRS_H
|
@@ -0,0 +1,86 @@
|
|
1
|
+
/////////////////////////////////////////////////////////////////////
|
2
|
+
// = NMatrix
|
3
|
+
//
|
4
|
+
// A linear algebra library for scientific computation in Ruby.
|
5
|
+
// NMatrix is part of SciRuby.
|
6
|
+
//
|
7
|
+
// NMatrix was originally inspired by and derived from NArray, by
|
8
|
+
// Masahiro Tanaka: http://narray.rubyforge.org
|
9
|
+
//
|
10
|
+
// == Copyright Information
|
11
|
+
//
|
12
|
+
// SciRuby is Copyright (c) 2010 - 2013, Ruby Science Foundation
|
13
|
+
// NMatrix is Copyright (c) 2013, Ruby Science Foundation
|
14
|
+
//
|
15
|
+
// Please see LICENSE.txt for additional copyright notices.
|
16
|
+
//
|
17
|
+
// == Contributing
|
18
|
+
//
|
19
|
+
// By contributing source code to SciRuby, you agree to be bound by
|
20
|
+
// our Contributor Agreement:
|
21
|
+
//
|
22
|
+
// * https://github.com/SciRuby/sciruby/wiki/Contributor-Agreement
|
23
|
+
//
|
24
|
+
// == idamax.h
|
25
|
+
//
|
26
|
+
// LAPACK idamax function in native C.
|
27
|
+
//
|
28
|
+
|
29
|
+
#ifndef IDAMAX_H
|
30
|
+
#define IDAMAX_H
|
31
|
+
|
32
|
+
namespace nm { namespace math {
|
33
|
+
|
34
|
+
/* Purpose */
|
35
|
+
/* ======= */
|
36
|
+
|
37
|
+
/* IDAMAX finds the index of element having max. absolute value. */
|
38
|
+
|
39
|
+
/* Further Details */
|
40
|
+
/* =============== */
|
41
|
+
|
42
|
+
/* jack dongarra, linpack, 3/11/78. */
|
43
|
+
/* modified 3/93 to return if incx .le. 0. */
|
44
|
+
/* modified 12/3/93, array(1) declarations changed to array(*) */
|
45
|
+
|
46
|
+
/* ===================================================================== */
|
47
|
+
|
48
|
+
template <typename DType>
|
49
|
+
inline int idamax(size_t n, DType *dx, int incx) {
|
50
|
+
|
51
|
+
/* Function Body */
|
52
|
+
if (n < 1 || incx <= 0) return -1;
|
53
|
+
if (n == 1) return 0;
|
54
|
+
|
55
|
+
DType dmax;
|
56
|
+
size_t imax = 0;
|
57
|
+
|
58
|
+
if (incx == 1) { // if incrementing by 1
|
59
|
+
|
60
|
+
dmax = abs(dx[0]);
|
61
|
+
|
62
|
+
for (size_t i = 1; i < n; ++i) {
|
63
|
+
if (std::abs(dx[i]) > dmax) {
|
64
|
+
imax = i;
|
65
|
+
dmax = std::abs(dx[i]);
|
66
|
+
}
|
67
|
+
}
|
68
|
+
|
69
|
+
} else { // if incrementing by more than 1
|
70
|
+
|
71
|
+
dmax = std::abs(dx[0]);
|
72
|
+
|
73
|
+
for (size_t i = 1, ix = incx; i < n; ++i, ix += incx) {
|
74
|
+
if (std::abs(dx[ix]) > dmax) {
|
75
|
+
imax = i;
|
76
|
+
dmax = std::abs(dx[ix]);
|
77
|
+
}
|
78
|
+
}
|
79
|
+
}
|
80
|
+
return imax;
|
81
|
+
} /* idamax_ */
|
82
|
+
|
83
|
+
}} // end of namespace nm::math
|
84
|
+
|
85
|
+
#endif
|
86
|
+
|
@@ -328,93 +328,7 @@ inline int iparmq(int ispec, int ilo, int ihi) {
|
|
328
328
|
|
329
329
|
/* ===================================================================== */
|
330
330
|
|
331
|
-
template <typename DType>
|
332
|
-
inline int ger(int m, int n, DType alpha, DType* x, int incx, DType* y, int incy, DType* a, int lda) {
|
333
|
-
|
334
|
-
// FIXME: Call BLAS ger if available
|
335
|
-
|
336
|
-
if (m < 0) {
|
337
|
-
return 1;
|
338
|
-
} else if (n < 0) {
|
339
|
-
return 2;
|
340
|
-
} else if (incx == 0) {
|
341
|
-
return 5;
|
342
|
-
} else if (incy == 0) {
|
343
|
-
return 7;
|
344
|
-
} else if (lda < std::max(1,m)) {
|
345
|
-
return 9;
|
346
|
-
}
|
347
|
-
|
348
|
-
if (m == 0 || n == 0 || alpha == 0) return 0; /* Quick return if possible. */
|
349
|
-
|
350
|
-
/* Start the operations. In this version the elements of A are */
|
351
|
-
/* accessed sequentially with one pass through A. */
|
352
|
-
|
353
|
-
// FIXME: These have been unrolled in a way that the compiler can handle. Collapse into a single case, or optimize
|
354
|
-
// FIXME: in a more modern way.
|
355
|
-
|
356
|
-
int jy = incy > 0 ? 0 : -(n-1) * incy;
|
357
|
-
|
358
|
-
if (incx == 1) {
|
359
|
-
|
360
|
-
for (size_t j = 0; j < n; ++j, jy += incy) {
|
361
|
-
if (y[jy] != 0) {
|
362
|
-
DType temp = alpha * y[jy];
|
363
|
-
for (size_t i = 0; i < m; ++i) {
|
364
|
-
a[i + j * lda] += x[i] * temp;
|
365
|
-
}
|
366
|
-
}
|
367
|
-
}
|
368
|
-
|
369
|
-
} else {
|
370
|
-
|
371
|
-
int kx = incx > 0 ? 0 : -(m-1) * incx;
|
372
|
-
|
373
|
-
for (size_t j = 0; j < n; ++j, jy += incy) {
|
374
|
-
if (y[jy] != 0) {
|
375
|
-
DType temp = alpha * y[jy];
|
376
|
-
|
377
|
-
for (size_t i = 0, ix = kx; i < m; ++i, ix += incx) {
|
378
|
-
a[i + j * lda] += x[ix] * temp;
|
379
|
-
}
|
380
|
-
}
|
381
|
-
}
|
382
|
-
|
383
|
-
}
|
384
|
-
|
385
|
-
return 0;
|
386
|
-
|
387
|
-
/* End of DGER . */
|
388
331
|
|
389
|
-
} /* dger_ */
|
390
|
-
|
391
|
-
|
392
|
-
/* Purpose */
|
393
|
-
/* ======= */
|
394
|
-
|
395
|
-
/* DSCAL scales a vector by a constant. */
|
396
|
-
/* uses unrolled loops for increment equal to one. */
|
397
|
-
|
398
|
-
/* Further Details */
|
399
|
-
/* =============== */
|
400
|
-
|
401
|
-
/* jack dongarra, linpack, 3/11/78. */
|
402
|
-
/* modified 3/93 to return if incx .le. 0. */
|
403
|
-
/* modified 12/3/93, array(1) declarations changed to array(*) */
|
404
|
-
|
405
|
-
/* ===================================================================== */
|
406
|
-
|
407
|
-
template <typename DType>
|
408
|
-
inline void scal(const int n, const DType da, DType* dx, const int incx) {
|
409
|
-
|
410
|
-
// This used to have unrolled loops, like dswap. They were in the way.
|
411
|
-
|
412
|
-
if (n <= 0 || incx <= 0) return;
|
413
|
-
|
414
|
-
for (int i = 0; incx < 0 ? i > n*incx : i < n*incx; i += incx) {
|
415
|
-
dx[i] = da * dx[i];
|
416
|
-
}
|
417
|
-
} /* scal */
|
418
332
|
|
419
333
|
|
420
334
|
/* Purpose */
|
@@ -431,118 +345,6 @@ inline void scal(const int n, const DType da, DType* dx, const int incx) {
|
|
431
345
|
|
432
346
|
/* ===================================================================== */
|
433
347
|
// Formerly dswap
|
434
|
-
template <typename DType>
|
435
|
-
inline void swap(int n, DType *dx, int incx, DType *dy, int incy) {
|
436
|
-
|
437
|
-
/* Function Body */
|
438
|
-
if (n <= 0) return;
|
439
|
-
|
440
|
-
/*
|
441
|
-
* The NETLIB version of dswap has loops manually unrolled, per commented code below.
|
442
|
-
* This doesn't make sense with modern compilers, which know much more about arch-
|
443
|
-
* itectures than we do. Combine that with our use of templates, and it's much more
|
444
|
-
* efficient to let the compiler do the unrolling in most cases.
|
445
|
-
*/
|
446
|
-
/*
|
447
|
-
if (incx == 1 && incy == 1) { // if both increments are 1
|
448
|
-
|
449
|
-
|
450
|
-
m = n % 3;
|
451
|
-
if (m) {
|
452
|
-
for (size_t i = 0; i < m; ++i) { // If number is not divisible by three, swap just one or two singly.
|
453
|
-
dtemp = dx[i];
|
454
|
-
dx[i] = dy[i];
|
455
|
-
dy[i] = dtemp;
|
456
|
-
}
|
457
|
-
if (n < 3) return;
|
458
|
-
}
|
459
|
-
|
460
|
-
for (i = m; i < n; i += 3) { // Why does it swap three at a time? -- John 8/27/12
|
461
|
-
DType dtemp = dx[i];
|
462
|
-
dx[i] = dy[i];
|
463
|
-
dy[i] = dtemp;
|
464
|
-
|
465
|
-
dtemp = dx[i + 1];
|
466
|
-
dx[i + 1] = dy[i + 1];
|
467
|
-
dy[i + 1] = dtemp;
|
468
|
-
|
469
|
-
dtemp = dx[i + 2];
|
470
|
-
dx[i + 2] = dy[i + 2];
|
471
|
-
dy[i + 2] = dtemp;
|
472
|
-
}
|
473
|
-
|
474
|
-
} else { // when any increment is not 1
|
475
|
-
*/
|
476
|
-
|
477
|
-
// For negative increments, start at the end of the array.
|
478
|
-
int ix = incx < 0 ? (-n+1)*incx : 0,
|
479
|
-
iy = incy < 0 ? (-n+1)*incy : 0;
|
480
|
-
|
481
|
-
if (incx < 0) ix = (-n + 1) * incx;
|
482
|
-
if (incy < 0) iy = (-n + 1) * incy;
|
483
|
-
|
484
|
-
for (size_t i = 0; i < n; ++i, ix += incx, iy += incy) {
|
485
|
-
DType dtemp = dx[ix];
|
486
|
-
dx[ix] = dy[iy];
|
487
|
-
dy[iy] = dtemp;
|
488
|
-
}
|
489
|
-
/*} */
|
490
|
-
return;
|
491
|
-
} /* dswap */
|
492
|
-
|
493
|
-
|
494
|
-
|
495
|
-
|
496
|
-
/* Purpose */
|
497
|
-
/* ======= */
|
498
|
-
|
499
|
-
/* IDAMAX finds the index of element having max. absolute value. */
|
500
|
-
|
501
|
-
/* Further Details */
|
502
|
-
/* =============== */
|
503
|
-
|
504
|
-
/* jack dongarra, linpack, 3/11/78. */
|
505
|
-
/* modified 3/93 to return if incx .le. 0. */
|
506
|
-
/* modified 12/3/93, array(1) declarations changed to array(*) */
|
507
|
-
|
508
|
-
/* ===================================================================== */
|
509
|
-
|
510
|
-
template <typename DType>
|
511
|
-
inline int idamax(size_t n, DType *dx, int incx) {
|
512
|
-
|
513
|
-
/* Function Body */
|
514
|
-
if (n < 1 || incx <= 0) return -1;
|
515
|
-
if (n == 1) return 0;
|
516
|
-
|
517
|
-
DType dmax;
|
518
|
-
size_t imax = 0;
|
519
|
-
|
520
|
-
if (incx == 1) { // if incrementing by 1
|
521
|
-
|
522
|
-
dmax = abs(dx[0]);
|
523
|
-
|
524
|
-
for (size_t i = 1; i < n; ++i) {
|
525
|
-
if (std::abs(dx[i]) > dmax) {
|
526
|
-
imax = i;
|
527
|
-
dmax = std::abs(dx[i]);
|
528
|
-
}
|
529
|
-
}
|
530
|
-
|
531
|
-
} else { // if incrementing by more than 1
|
532
|
-
|
533
|
-
dmax = std::abs(dx[0]);
|
534
|
-
|
535
|
-
for (size_t i = 1, ix = incx; i < n; ++i, ix += incx) {
|
536
|
-
if (std::abs(dx[ix]) > dmax) {
|
537
|
-
imax = i;
|
538
|
-
dmax = std::abs(dx[ix]);
|
539
|
-
}
|
540
|
-
}
|
541
|
-
}
|
542
|
-
return imax;
|
543
|
-
} /* idamax_ */
|
544
|
-
|
545
|
-
|
546
348
|
|
547
349
|
|
548
350
|
|
@@ -995,181 +797,83 @@ inline int ilaenv(int ispec, const std::string& name, int n1, int n2, int n3, in
|
|
995
797
|
|
996
798
|
|
997
799
|
|
998
|
-
/* > \brief \b DGETF2 */
|
999
|
-
|
1000
|
-
/* =========== DOCUMENTATION =========== */
|
1001
|
-
|
1002
|
-
/* Online html documentation available at */
|
1003
|
-
/* http://www.netlib.org/lapack/explore-html/ */
|
1004
|
-
|
1005
|
-
/* > \htmlonly */
|
1006
|
-
/* > Download DGETF2 + dependencies */
|
1007
|
-
/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dgetf2.
|
1008
|
-
f"> */
|
1009
|
-
/* > [TGZ]</a> */
|
1010
|
-
/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dgetf2.
|
1011
|
-
f"> */
|
1012
|
-
/* > [ZIP]</a> */
|
1013
|
-
/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dgetf2.
|
1014
|
-
f"> */
|
1015
|
-
/* > [TXT]</a> */
|
1016
|
-
/* > \endhtmlonly */
|
1017
|
-
|
1018
|
-
/* Definition: */
|
1019
|
-
/* =========== */
|
1020
|
-
|
1021
|
-
/* SUBROUTINE DGETF2( M, N, A, LDA, IPIV, INFO ) */
|
1022
|
-
|
1023
|
-
/* .. Scalar Arguments .. */
|
1024
|
-
/* INTEGER INFO, LDA, M, N */
|
1025
|
-
/* .. */
|
1026
|
-
/* .. Array Arguments .. */
|
1027
|
-
/* INTEGER IPIV( * ) */
|
1028
|
-
/* DOUBLE PRECISION A( LDA, * ) */
|
1029
|
-
/* .. */
|
1030
|
-
|
1031
|
-
|
1032
|
-
/* > \par Purpose: */
|
1033
|
-
/* ============= */
|
1034
|
-
/* > */
|
1035
|
-
/* > \verbatim */
|
1036
|
-
/* > */
|
1037
|
-
/* > DGETF2 computes an LU factorization of a general m-by-n matrix A */
|
1038
|
-
/* > using partial pivoting with row interchanges. */
|
1039
|
-
/* > */
|
1040
|
-
/* > The factorization has the form */
|
1041
|
-
/* > A = P * L * U */
|
1042
|
-
/* > where P is a permutation matrix, L is lower triangular with unit */
|
1043
|
-
/* > diagonal elements (lower trapezoidal if m > n), and U is upper */
|
1044
|
-
/* > triangular (upper trapezoidal if m < n). */
|
1045
|
-
/* > */
|
1046
|
-
/* > This is the right-looking Level 2 BLAS version of the algorithm. */
|
1047
|
-
/* > \endverbatim */
|
1048
|
-
|
1049
|
-
/* Arguments: */
|
1050
|
-
/* ========== */
|
1051
|
-
|
1052
|
-
/* > \param[in] M */
|
1053
|
-
/* > \verbatim */
|
1054
|
-
/* > M is INTEGER */
|
1055
|
-
/* > The number of rows of the matrix A. M >= 0. */
|
1056
|
-
/* > \endverbatim */
|
1057
|
-
/* > */
|
1058
|
-
/* > \param[in] N */
|
1059
|
-
/* > \verbatim */
|
1060
|
-
/* > N is INTEGER */
|
1061
|
-
/* > The number of columns of the matrix A. N >= 0. */
|
1062
|
-
/* > \endverbatim */
|
1063
|
-
/* > */
|
1064
|
-
/* > \param[in,out] A */
|
1065
|
-
/* > \verbatim */
|
1066
|
-
/* > A is DOUBLE PRECISION array, dimension (LDA,N) */
|
1067
|
-
/* > On entry, the m by n matrix to be factored. */
|
1068
|
-
/* > On exit, the factors L and U from the factorization */
|
1069
|
-
/* > A = P*L*U; the unit diagonal elements of L are not stored. */
|
1070
|
-
/* > \endverbatim */
|
1071
|
-
/* > */
|
1072
|
-
/* > \param[in] LDA */
|
1073
|
-
/* > \verbatim */
|
1074
|
-
/* > LDA is INTEGER */
|
1075
|
-
/* > The leading dimension of the array A. LDA >= max(1,M). */
|
1076
|
-
/* > \endverbatim */
|
1077
|
-
/* > */
|
1078
|
-
/* > \param[out] IPIV */
|
1079
|
-
/* > \verbatim */
|
1080
|
-
/* > IPIV is INTEGER array, dimension (min(M,N)) */
|
1081
|
-
/* > The pivot indices; for 1 <= i <= min(M,N), row i of the */
|
1082
|
-
/* > matrix was interchanged with row IPIV(i). */
|
1083
|
-
/* > \endverbatim */
|
1084
|
-
/* > */
|
1085
|
-
/* > \param[out] INFO */
|
1086
|
-
/* > \verbatim */
|
1087
|
-
/* > INFO is INTEGER */
|
1088
|
-
/* > = 0: successful exit */
|
1089
|
-
/* > < 0: if INFO = -k, the k-th argument had an illegal value */
|
1090
|
-
/* > > 0: if INFO = k, U(k,k) is exactly zero. The factorization */
|
1091
|
-
/* > has been completed, but the factor U is exactly */
|
1092
|
-
/* > singular, and division by zero will occur if it is used */
|
1093
|
-
/* > to solve a system of equations. */
|
1094
|
-
/* > \endverbatim */
|
1095
|
-
|
1096
|
-
/* Authors: */
|
1097
|
-
/* ======== */
|
1098
|
-
|
1099
|
-
/* > \author Univ. of Tennessee */
|
1100
|
-
/* > \author Univ. of California Berkeley */
|
1101
|
-
/* > \author Univ. of Colorado Denver */
|
1102
|
-
/* > \author NAG Ltd. */
|
1103
|
-
|
1104
|
-
/* > \date November 2011 */
|
1105
|
-
|
1106
|
-
/* > \ingroup doubleGEcomputational */
|
1107
|
-
|
1108
|
-
/* ===================================================================== */
|
1109
|
-
|
1110
800
|
template <typename DType>
|
1111
|
-
inline int
|
1112
|
-
|
1113
|
-
|
1114
|
-
if (m < 0) return -1; // error
|
1115
|
-
else if (n < 0) return -2; // error
|
1116
|
-
else if (lda < std::max(1,m)) return -4; // error
|
1117
|
-
|
1118
|
-
|
1119
|
-
if (m == 0 || n == 0) return 0; /* Quick return if possible */
|
801
|
+
inline int lsame(char const *ca, char const *cb, int ca_len, int cb_len)
|
802
|
+
{
|
803
|
+
int ret_val;
|
1120
804
|
|
1121
|
-
|
1122
|
-
|
1123
|
-
|
1124
|
-
|
1125
|
-
int jp = j - 1 + idamax<DType>(m-j+1, &a[j + j * lda], 1);
|
1126
|
-
|
1127
|
-
ipiv[j] = jp;
|
805
|
+
int static inta, intb, zcode;
|
806
|
+
/* -- LAPACK auxiliary routine (version 3.2) -- */
|
807
|
+
/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
|
808
|
+
/* November 2006 */
|
1128
809
|
|
810
|
+
/* .. Scalar Arguments .. */
|
811
|
+
/* .. */
|
1129
812
|
|
1130
|
-
|
813
|
+
/* Purpose */
|
814
|
+
/* ======= */
|
1131
815
|
|
1132
|
-
|
1133
|
-
|
1134
|
-
if (jp != j) swap<DType>(n, &a[j], lda, &a[jp], lda);
|
816
|
+
/* LSAME returns .TRUE. if CA is the same letter as CB regardless of */
|
817
|
+
/* case. */
|
1135
818
|
|
1136
|
-
|
819
|
+
/* Arguments */
|
820
|
+
/* ========= */
|
1137
821
|
|
1138
|
-
|
1139
|
-
|
1140
|
-
|
1141
|
-
} else {
|
1142
|
-
for (size_t i = 0; i < m-j; ++i) { // changed
|
1143
|
-
a[j+i+j*lda] /= a[j+j*lda];
|
1144
|
-
}
|
1145
|
-
}
|
1146
|
-
}
|
822
|
+
/* CA (input) CHARACTER*1 */
|
823
|
+
/* CB (input) CHARACTER*1 */
|
824
|
+
/* CA and CB specify the single characters to be compared. */
|
1147
825
|
|
1148
|
-
|
1149
|
-
|
826
|
+
/* ===================================================================== */
|
827
|
+
inta = *(unsigned char *)ca;
|
828
|
+
intb = *(unsigned char *)cb;
|
829
|
+
ret_val = inta == intb;
|
830
|
+
if (ret_val) {
|
831
|
+
return ret_val;
|
832
|
+
}
|
833
|
+
|
834
|
+
zcode = (int)"Z";
|
835
|
+
|
836
|
+
if (zcode == 90 || zcode == 122 ) {
|
837
|
+
/* ASCII is assumed - ZCODE is the ASCII code of either lower or */
|
838
|
+
/* upper case 'Z'. */
|
839
|
+
if (inta >=97 && inta <= 122) {
|
840
|
+
inta += -32;
|
841
|
+
}
|
842
|
+
if (intb >= 97 && intb <= 122) {
|
843
|
+
intb += -32;
|
844
|
+
}
|
845
|
+
} else if (zcode == 233 || zcode == 169) {
|
846
|
+
/* EBCDIC is assumed - ZCODE is the EBCDIC code of either lower or */
|
847
|
+
/* upper case 'Z'. */
|
848
|
+
if (inta >= 129 && inta <= 137 || inta >= 145 && inta <= 153 || inta >= 162 && inta <= 169) {
|
849
|
+
inta += 64;
|
850
|
+
}
|
851
|
+
if (intb >= 129 && intb <= 137 || intb >= 145 && intb <= 153 || intb >= 162 && intb <= 169) {
|
852
|
+
intb += 64;
|
1150
853
|
}
|
854
|
+
} else if (zcode = 218 || zcode == 250) {
|
855
|
+
/* ASCII is assumed, on Prime machines - ZCODE is the ASCII code */
|
856
|
+
/* plus 128 of either lower or upper case 'Z'. */
|
857
|
+
if (inta >= 225 && inta <= 250) {
|
858
|
+
inta += -32;
|
859
|
+
}
|
860
|
+
if (intb >= 225 && intb <= 250) {
|
861
|
+
intb += -32;
|
862
|
+
}
|
863
|
+
}
|
864
|
+
ret_val = inta == intb;
|
1151
865
|
|
1152
|
-
|
1153
|
-
|
866
|
+
return ret_val;
|
867
|
+
}
|
1154
868
|
|
1155
|
-
}
|
1156
|
-
return 0;
|
1157
|
-
} /* dgetf2_ */
|
1158
869
|
|
1159
870
|
|
1160
871
|
|
1161
872
|
} // end namespace lapack
|
1162
873
|
|
1163
|
-
|
1164
|
-
* Function signature conversion for LAPACK's scal function.
|
1165
|
-
*/
|
1166
|
-
template <typename DType>
|
1167
|
-
inline void clapack_scal(const int n, const void* da, void* dx, const int incx) {
|
1168
|
-
// FIXME: See if we can call the clapack version instead of our C++ version.
|
1169
|
-
nm::math::lapack::scal<DType>(n, *reinterpret_cast<const DType*>(da), reinterpret_cast<DType*>(dx), incx);
|
1170
|
-
}
|
874
|
+
|
1171
875
|
|
1172
876
|
|
1173
877
|
}}
|
1174
878
|
|
1175
|
-
#endif
|
879
|
+
#endif
|