nmatrix 0.0.6 → 0.0.7

Sign up to get free protection for your applications and to get access to all the features.
Files changed (67) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +2 -0
  3. data/Gemfile +5 -0
  4. data/History.txt +97 -0
  5. data/Manifest.txt +34 -7
  6. data/README.rdoc +13 -13
  7. data/Rakefile +36 -26
  8. data/ext/nmatrix/data/data.cpp +15 -2
  9. data/ext/nmatrix/data/data.h +4 -0
  10. data/ext/nmatrix/data/ruby_object.h +5 -14
  11. data/ext/nmatrix/extconf.rb +3 -2
  12. data/ext/nmatrix/{util/math.cpp → math.cpp} +296 -6
  13. data/ext/nmatrix/math/asum.h +143 -0
  14. data/ext/nmatrix/math/geev.h +82 -0
  15. data/ext/nmatrix/math/gemm.h +267 -0
  16. data/ext/nmatrix/math/gemv.h +208 -0
  17. data/ext/nmatrix/math/ger.h +96 -0
  18. data/ext/nmatrix/math/gesdd.h +80 -0
  19. data/ext/nmatrix/math/gesvd.h +78 -0
  20. data/ext/nmatrix/math/getf2.h +86 -0
  21. data/ext/nmatrix/math/getrf.h +240 -0
  22. data/ext/nmatrix/math/getri.h +107 -0
  23. data/ext/nmatrix/math/getrs.h +125 -0
  24. data/ext/nmatrix/math/idamax.h +86 -0
  25. data/ext/nmatrix/{util → math}/lapack.h +60 -356
  26. data/ext/nmatrix/math/laswp.h +165 -0
  27. data/ext/nmatrix/math/long_dtype.h +52 -0
  28. data/ext/nmatrix/math/math.h +1154 -0
  29. data/ext/nmatrix/math/nrm2.h +181 -0
  30. data/ext/nmatrix/math/potrs.h +125 -0
  31. data/ext/nmatrix/math/rot.h +141 -0
  32. data/ext/nmatrix/math/rotg.h +115 -0
  33. data/ext/nmatrix/math/scal.h +73 -0
  34. data/ext/nmatrix/math/swap.h +73 -0
  35. data/ext/nmatrix/math/trsm.h +383 -0
  36. data/ext/nmatrix/nmatrix.cpp +176 -152
  37. data/ext/nmatrix/nmatrix.h +1 -2
  38. data/ext/nmatrix/ruby_constants.cpp +9 -4
  39. data/ext/nmatrix/ruby_constants.h +1 -0
  40. data/ext/nmatrix/storage/dense.cpp +57 -41
  41. data/ext/nmatrix/storage/list.cpp +52 -50
  42. data/ext/nmatrix/storage/storage.cpp +59 -43
  43. data/ext/nmatrix/storage/yale.cpp +352 -333
  44. data/ext/nmatrix/storage/yale.h +4 -0
  45. data/lib/nmatrix.rb +2 -2
  46. data/lib/nmatrix/blas.rb +4 -4
  47. data/lib/nmatrix/enumerate.rb +241 -0
  48. data/lib/nmatrix/lapack.rb +54 -1
  49. data/lib/nmatrix/math.rb +462 -0
  50. data/lib/nmatrix/nmatrix.rb +210 -486
  51. data/lib/nmatrix/nvector.rb +0 -62
  52. data/lib/nmatrix/rspec.rb +75 -0
  53. data/lib/nmatrix/shortcuts.rb +136 -108
  54. data/lib/nmatrix/version.rb +1 -1
  55. data/spec/blas_spec.rb +20 -12
  56. data/spec/elementwise_spec.rb +22 -13
  57. data/spec/io_spec.rb +1 -0
  58. data/spec/lapack_spec.rb +197 -0
  59. data/spec/nmatrix_spec.rb +39 -38
  60. data/spec/nvector_spec.rb +3 -9
  61. data/spec/rspec_monkeys.rb +29 -0
  62. data/spec/rspec_spec.rb +34 -0
  63. data/spec/shortcuts_spec.rb +14 -16
  64. data/spec/slice_spec.rb +242 -186
  65. data/spec/spec_helper.rb +19 -0
  66. metadata +33 -5
  67. data/ext/nmatrix/util/math.h +0 -2612
@@ -0,0 +1,96 @@
1
+ /////////////////////////////////////////////////////////////////////
2
+ // = NMatrix
3
+ //
4
+ // A linear algebra library for scientific computation in Ruby.
5
+ // NMatrix is part of SciRuby.
6
+ //
7
+ // NMatrix was originally inspired by and derived from NArray, by
8
+ // Masahiro Tanaka: http://narray.rubyforge.org
9
+ //
10
+ // == Copyright Information
11
+ //
12
+ // SciRuby is Copyright (c) 2010 - 2013, Ruby Science Foundation
13
+ // NMatrix is Copyright (c) 2013, Ruby Science Foundation
14
+ //
15
+ // Please see LICENSE.txt for additional copyright notices.
16
+ //
17
+ // == Contributing
18
+ //
19
+ // By contributing source code to SciRuby, you agree to be bound by
20
+ // our Contributor Agreement:
21
+ //
22
+ // * https://github.com/SciRuby/sciruby/wiki/Contributor-Agreement
23
+ //
24
+ // == ger.h
25
+ //
26
+ // BLAS level-2 ger function in native C++.
27
+ //
28
+
29
+ #ifndef GER_H
30
+ #define GER_H
31
+
32
+ namespace nm { namespace math {
33
+
34
+ template <typename DType>
35
+ inline int ger(int m, int n, DType alpha, DType* x, int incx, DType* y, int incy, DType* a, int lda) {
36
+
37
+ // FIXME: Call BLAS ger if available
38
+
39
+ if (m < 0) {
40
+ return 1;
41
+ } else if (n < 0) {
42
+ return 2;
43
+ } else if (incx == 0) {
44
+ return 5;
45
+ } else if (incy == 0) {
46
+ return 7;
47
+ } else if (lda < std::max(1,m)) {
48
+ return 9;
49
+ }
50
+
51
+ if (m == 0 || n == 0 || alpha == 0) return 0; /* Quick return if possible. */
52
+
53
+ /* Start the operations. In this version the elements of A are */
54
+ /* accessed sequentially with one pass through A. */
55
+
56
+ // FIXME: These have been unrolled in a way that the compiler can handle. Collapse into a single case, or optimize
57
+ // FIXME: in a more modern way.
58
+
59
+ int jy = incy > 0 ? 0 : -(n-1) * incy;
60
+
61
+ if (incx == 1) {
62
+
63
+ for (size_t j = 0; j < n; ++j, jy += incy) {
64
+ if (y[jy] != 0) {
65
+ DType temp = alpha * y[jy];
66
+ for (size_t i = 0; i < m; ++i) {
67
+ a[i + j * lda] += x[i] * temp;
68
+ }
69
+ }
70
+ }
71
+
72
+ } else {
73
+
74
+ int kx = incx > 0 ? 0 : -(m-1) * incx;
75
+
76
+ for (size_t j = 0; j < n; ++j, jy += incy) {
77
+ if (y[jy] != 0) {
78
+ DType temp = alpha * y[jy];
79
+
80
+ for (size_t i = 0, ix = kx; i < m; ++i, ix += incx) {
81
+ a[i + j * lda] += x[ix] * temp;
82
+ }
83
+ }
84
+ }
85
+
86
+ }
87
+
88
+ return 0;
89
+
90
+ /* End of DGER . */
91
+
92
+ } /* dger_ */
93
+
94
+ }} // end nm::math
95
+
96
+ #endif // GER_H
@@ -0,0 +1,80 @@
1
+ /////////////////////////////////////////////////////////////////////
2
+ // = NMatrix
3
+ //
4
+ // A linear algebra library for scientific computation in Ruby.
5
+ // NMatrix is part of SciRuby.
6
+ //
7
+ // NMatrix was originally inspired by and derived from NArray, by
8
+ // Masahiro Tanaka: http://narray.rubyforge.org
9
+ //
10
+ // == Copyright Information
11
+ //
12
+ // SciRuby is Copyright (c) 2010 - 2013, Ruby Science Foundation
13
+ // NMatrix is Copyright (c) 2013, Ruby Science Foundation
14
+ //
15
+ // Please see LICENSE.txt for additional copyright notices.
16
+ //
17
+ // == Contributing
18
+ //
19
+ // By contributing source code to SciRuby, you agree to be bound by
20
+ // our Contributor Agreement:
21
+ //
22
+ // * https://github.com/SciRuby/sciruby/wiki/Contributor-Agreement
23
+ //
24
+ // == gesdd.h
25
+ //
26
+ // Header file for interface with LAPACK's xGESDD functions.
27
+ //
28
+
29
+ #ifndef GESDD_H
30
+ # define GESDD_H
31
+
32
+ extern "C" {
33
+
34
+ void sgesdd_(char*, int*, int*, float*, int*, float*, float*, int*, float*, int*, float*, int*, int*, int*);
35
+ void dgesdd_(char*, int*, int*, double*, int*, double*, double*, int*, double*, int*, double*, int*, int*, int*);
36
+ void cgesdd_(char*, int*, int*, nm::Complex64*, int*, nm::Complex64*, nm::Complex64*, int*, nm::Complex64*, int*, nm::Complex64*, int*, float*, int*, int*);
37
+ void zgesdd_(char*, int*, int*, nm::Complex128*, int*, nm::Complex128*, nm::Complex128*, int*, nm::Complex128*, int*, nm::Complex128*, int*, double*, int*, int*);
38
+ }
39
+
40
+ namespace nm {
41
+ namespace math {
42
+
43
+ template <typename DType, typename CType>
44
+ inline int gesdd(char jobz, int m, int n, DType* a, int lda, DType* s, DType* u, int ldu, DType* vt, int ldvt, DType* work, int lwork, int* iwork, CType* rwork) {
45
+ rb_raise(rb_eNotImpError, "not yet implemented for non-BLAS dtypes");
46
+ return -1;
47
+ }
48
+
49
+ template <>
50
+ inline int gesdd(char jobz, int m, int n, float* a, int lda, float* s, float* u, int ldu, float* vt, int ldvt, float* work, int lwork, int* iwork, float* rwork) {
51
+ int info;
52
+ sgesdd_(&jobz, &m, &n, a, &lda, s, u, &ldu, vt, &ldvt, work, &lwork, iwork, &info);
53
+ return info;
54
+ }
55
+
56
+ template <>
57
+ inline int gesdd(char jobz, int m, int n, double* a, int lda, double* s, double* u, int ldu, double* vt, int ldvt, double* work, int lwork, int* iwork, double* rwork) {
58
+ int info;
59
+ dgesdd_(&jobz, &m, &n, a, &lda, s, u, &ldu, vt, &ldvt, work, &lwork, iwork, &info);
60
+ return info;
61
+ }
62
+
63
+ template <>
64
+ inline int gesdd(char jobz, int m, int n, nm::Complex64* a, int lda, nm::Complex64* s, nm::Complex64* u, int ldu, nm::Complex64* vt, int ldvt, nm::Complex64* work, int lwork, int* iwork, float* rwork) {
65
+ int info;
66
+ cgesdd_(&jobz, &m, &n, a, &lda, s, u, &ldu, vt, &ldvt, work, &lwork, rwork, iwork, &info);
67
+ return info;
68
+ }
69
+
70
+ template <>
71
+ inline int gesdd(char jobz, int m, int n, nm::Complex128* a, int lda, nm::Complex128* s, nm::Complex128* u, int ldu, nm::Complex128* vt, int ldvt, nm::Complex128* work, int lwork, int* iwork, double* rwork) {
72
+ int info;
73
+ zgesdd_(&jobz, &m, &n, a, &lda, s, u, &ldu, vt, &ldvt, work, &lwork, rwork, iwork, &info);
74
+ return info;
75
+ }
76
+
77
+ } // end of namespace math
78
+ } // end of namespace nm
79
+
80
+ #endif // GESDD_H
@@ -0,0 +1,78 @@
1
+ /////////////////////////////////////////////////////////////////////
2
+ // = NMatrix
3
+ //
4
+ // A linear algebra library for scientific computation in Ruby.
5
+ // NMatrix is part of SciRuby.
6
+ //
7
+ // NMatrix was originally inspired by and derived from NArray, by
8
+ // Masahiro Tanaka: http://narray.rubyforge.org
9
+ //
10
+ // == Copyright Information
11
+ //
12
+ // SciRuby is Copyright (c) 2010 - 2013, Ruby Science Foundation
13
+ // NMatrix is Copyright (c) 2013, Ruby Science Foundation
14
+ //
15
+ // Please see LICENSE.txt for additional copyright notices.
16
+ //
17
+ // == Contributing
18
+ //
19
+ // By contributing source code to SciRuby, you agree to be bound by
20
+ // our Contributor Agreement:
21
+ //
22
+ // * https://github.com/SciRuby/sciruby/wiki/Contributor-Agreement
23
+ //
24
+ // == gesvd.h
25
+ //
26
+ // Header file for interface with LAPACK's xGESVD functions.
27
+ //
28
+
29
+ #ifndef GESVD_H
30
+ # define GESVD_H
31
+
32
+ extern "C" {
33
+ void sgesvd_(char*, char*, int*, int*, float*, int*, float*, float*, int*, float*, int*, float*, int*, int*);
34
+ void dgesvd_(char*, char*, int*, int*, double*, int*, double*, double*, int*, double*, int*, double*, int*, int*);
35
+ void cgesvd_(char*, char*, int*, int*, nm::Complex64*, int*, nm::Complex64*, nm::Complex64*, int*, nm::Complex64*, int*, nm::Complex64*, int*, float*, int*);
36
+ void zgesvd_(char*, char*, int*, int*, nm::Complex128*, int*, nm::Complex128*, nm::Complex128*, int*, nm::Complex128*, int*, nm::Complex128*, int*, double*, int*);
37
+ }
38
+
39
+ namespace nm {
40
+ namespace math {
41
+
42
+ template <typename DType, typename CType>
43
+ inline int gesvd(char jobu, char jobvt, int m, int n, DType* a, int lda, DType* s, DType* u, int ldu, DType* vt, int ldvt, DType* work, int lwork, CType* rwork) {
44
+ rb_raise(rb_eNotImpError, "not yet implemented for non-BLAS dtypes");
45
+ return -1;
46
+ }
47
+
48
+ template <>
49
+ inline int gesvd(char jobu, char jobvt, int m, int n, float* a, int lda, float* s, float* u, int ldu, float* vt, int ldvt, float* work, int lwork, float* rwork) {
50
+ int info;
51
+ sgesvd_(&jobu, &jobvt, &m, &n, a, &lda, s, u, &ldu, vt, &ldvt, work, &lwork, &info);
52
+ return info;
53
+ }
54
+
55
+ template <>
56
+ inline int gesvd(char jobu, char jobvt, int m, int n, double* a, int lda, double* s, double* u, int ldu, double* vt, int ldvt, double* work, int lwork, double* rwork) {
57
+ int info;
58
+ dgesvd_(&jobu, &jobvt, &m, &n, a, &lda, s, u, &ldu, vt, &ldvt, work, &lwork, &info);
59
+ return info;
60
+ }
61
+
62
+ template <>
63
+ inline int gesvd(char jobu, char jobvt, int m, int n, nm::Complex64* a, int lda, nm::Complex64* s, nm::Complex64* u, int ldu, nm::Complex64* vt, int ldvt, nm::Complex64* work, int lwork, float* rwork) {
64
+ int info;
65
+ cgesvd_(&jobu, &jobvt, &m, &n, a, &lda, s, u, &ldu, vt, &ldvt, work, &lwork, rwork, &info);
66
+ return info;
67
+ }
68
+
69
+ template <>
70
+ inline int gesvd(char jobu, char jobvt, int m, int n, nm::Complex128* a, int lda, nm::Complex128* s, nm::Complex128* u, int ldu, nm::Complex128* vt, int ldvt, nm::Complex128* work, int lwork, double* rwork) {
71
+ int info;
72
+ zgesvd_(&jobu, &jobvt, &m, &n, a, &lda, s, u, &ldu, vt, &ldvt, work, &lwork, rwork, &info);
73
+ return info;
74
+ }
75
+
76
+ } // end of namespace math
77
+ } // end of namespace nm
78
+ #endif // GESVD_H
@@ -0,0 +1,86 @@
1
+ /////////////////////////////////////////////////////////////////////
2
+ // = NMatrix
3
+ //
4
+ // A linear algebra library for scientific computation in Ruby.
5
+ // NMatrix is part of SciRuby.
6
+ //
7
+ // NMatrix was originally inspired by and derived from NArray, by
8
+ // Masahiro Tanaka: http://narray.rubyforge.org
9
+ //
10
+ // == Copyright Information
11
+ //
12
+ // SciRuby is Copyright (c) 2010 - 2013, Ruby Science Foundation
13
+ // NMatrix is Copyright (c) 2013, Ruby Science Foundation
14
+ //
15
+ // Please see LICENSE.txt for additional copyright notices.
16
+ //
17
+ // == Contributing
18
+ //
19
+ // By contributing source code to SciRuby, you agree to be bound by
20
+ // our Contributor Agreement:
21
+ //
22
+ // * https://github.com/SciRuby/sciruby/wiki/Contributor-Agreement
23
+ //
24
+ // == getf2.h
25
+ //
26
+ // LAPACK getf2 function in native C++.
27
+ //
28
+
29
+ #ifndef GETF2_H
30
+ #define GETF2_H
31
+
32
+ namespace nm { namespace math {
33
+
34
+ template <typename DType>
35
+ inline int getf2(const int m, const int n, DType* a, const int lda, int *ipiv) {
36
+
37
+ /* Function Body */
38
+ if (m < 0) return -1; // error
39
+ else if (n < 0) return -2; // error
40
+ else if (lda < std::max(1,m)) return -4; // error
41
+
42
+
43
+ if (m == 0 || n == 0) return 0; /* Quick return if possible */
44
+
45
+ for (size_t j = 0; j < std::min(m,n); ++j) { // changed
46
+
47
+ /* Find pivot and test for singularity. */
48
+
49
+ int jp = j - 1 + idamax<DType>(m-j+1, &a[j + j * lda], 1);
50
+
51
+ ipiv[j] = jp;
52
+
53
+
54
+ if (a[jp + j*lda] != 0) {
55
+
56
+ /* Apply the interchange to columns 1:N. */
57
+ // (Don't swap two columns that are the same.)
58
+ if (jp != j) swap<DType>(n, &a[j], lda, &a[jp], lda);
59
+
60
+ /* Compute elements J+1:M of J-th column. */
61
+
62
+ if (j < m-1) {
63
+ if (std::abs(a[j+j*lda]) >= std::numeric_limits<DType>::min()) {
64
+ scal<DType>(m-j, 1.0 / a[j+j*lda], &a[j+1+j*lda], 1);
65
+ } else {
66
+ for (size_t i = 0; i < m-j; ++i) { // changed
67
+ a[j+i+j*lda] /= a[j+j*lda];
68
+ }
69
+ }
70
+ }
71
+
72
+ } else { // singular matrix
73
+ return j; // U(j,j) is exactly zero, div by zero if answer is used to solve a system of equations.
74
+ }
75
+
76
+ if (j < std::min(m,n)-1) /* Update trailing submatrix. */
77
+ ger<DType>(m-j, n-j, -1.0, &a[j+1+j*lda], 1, &a[j+(j+1)*lda], lda, &a[j+1+(j+1)*lda], lda);
78
+
79
+ }
80
+ return 0;
81
+ } /* dgetf2_ */
82
+
83
+
84
+ }} // end of namespace nm::math
85
+
86
+ #endif // GETF2
@@ -0,0 +1,240 @@
1
+ /////////////////////////////////////////////////////////////////////
2
+ // = NMatrix
3
+ //
4
+ // A linear algebra library for scientific computation in Ruby.
5
+ // NMatrix is part of SciRuby.
6
+ //
7
+ // NMatrix was originally inspired by and derived from NArray, by
8
+ // Masahiro Tanaka: http://narray.rubyforge.org
9
+ //
10
+ // == Copyright Information
11
+ //
12
+ // SciRuby is Copyright (c) 2010 - 2013, Ruby Science Foundation
13
+ // NMatrix is Copyright (c) 2013, Ruby Science Foundation
14
+ //
15
+ // Please see LICENSE.txt for additional copyright notices.
16
+ //
17
+ // == Contributing
18
+ //
19
+ // By contributing source code to SciRuby, you agree to be bound by
20
+ // our Contributor Agreement:
21
+ //
22
+ // * https://github.com/SciRuby/sciruby/wiki/Contributor-Agreement
23
+ //
24
+ // == getrf.h
25
+ //
26
+ // getrf function in native C++.
27
+ //
28
+
29
+ /*
30
+ * Automatically Tuned Linear Algebra Software v3.8.4
31
+ * (C) Copyright 1999 R. Clint Whaley
32
+ *
33
+ * Redistribution and use in source and binary forms, with or without
34
+ * modification, are permitted provided that the following conditions
35
+ * are met:
36
+ * 1. Redistributions of source code must retain the above copyright
37
+ * notice, this list of conditions and the following disclaimer.
38
+ * 2. Redistributions in binary form must reproduce the above copyright
39
+ * notice, this list of conditions, and the following disclaimer in the
40
+ * documentation and/or other materials provided with the distribution.
41
+ * 3. The name of the ATLAS group or the names of its contributers may
42
+ * not be used to endorse or promote products derived from this
43
+ * software without specific written permission.
44
+ *
45
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
46
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
47
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
48
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE ATLAS GROUP OR ITS CONTRIBUTORS
49
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
50
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
51
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
52
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
53
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
54
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
55
+ * POSSIBILITY OF SUCH DAMAGE.
56
+ *
57
+ */
58
+
59
+ #ifndef GETRF_H
60
+ #define GETRF_H
61
+
62
+ namespace nm { namespace math {
63
+
64
+ /* Numeric inverse -- usually just 1 / f, but a little more complicated for complex. */
65
+ template <typename DType>
66
+ inline DType numeric_inverse(const DType& n) {
67
+ return n.inverse();
68
+ }
69
+ template <> inline float numeric_inverse(const float& n) { return 1 / n; }
70
+ template <> inline double numeric_inverse(const double& n) { return 1 / n; }
71
+
72
+
73
+
74
+ /*
75
+ * Templated version of row-order and column-order getrf, derived from ATL_getrfR.c (from ATLAS 3.8.0).
76
+ *
77
+ * 1. Row-major factorization of form
78
+ * A = L * U * P
79
+ * where P is a column-permutation matrix, L is lower triangular (lower
80
+ * trapazoidal if M > N), and U is upper triangular with unit diagonals (upper
81
+ * trapazoidal if M < N). This is the recursive Level 3 BLAS version.
82
+ *
83
+ * 2. Column-major factorization of form
84
+ * A = P * L * U
85
+ * where P is a row-permutation matrix, L is lower triangular with unit diagonal
86
+ * elements (lower trapazoidal if M > N), and U is upper triangular (upper
87
+ * trapazoidal if M < N). This is the recursive Level 3 BLAS version.
88
+ *
89
+ * Template argument determines whether 1 or 2 is utilized.
90
+ */
91
+ template <bool RowMajor, typename DType>
92
+ inline int getrf_nothrow(const int M, const int N, DType* A, const int lda, int* ipiv) {
93
+ const int MN = std::min(M, N);
94
+ int ierr = 0;
95
+
96
+ // Symbols used by ATLAS in the several versions of this function:
97
+ // Row Col Us
98
+ // Nup Nleft N_ul
99
+ // Ndown Nright N_dr
100
+ // We're going to use N_ul, N_dr
101
+
102
+ DType neg_one = -1, one = 1;
103
+
104
+ if (MN > 1) {
105
+ int N_ul = MN >> 1;
106
+
107
+ // FIXME: Figure out how ATLAS #defines NB
108
+ #ifdef NB
109
+ if (N_ul > NB) N_ul = ATL_MulByNB(ATL_DivByNB(N_ul));
110
+ #endif
111
+
112
+ int N_dr = M - N_ul;
113
+
114
+ int i = RowMajor ? getrf_nothrow<true,DType>(N_ul, N, A, lda, ipiv) : getrf_nothrow<false,DType>(M, N_ul, A, lda, ipiv);
115
+
116
+ if (i) if (!ierr) ierr = i;
117
+
118
+ DType *Ar, *Ac, *An;
119
+ if (RowMajor) {
120
+ Ar = &(A[N_ul * lda]),
121
+ Ac = &(A[N_ul]);
122
+ An = &(Ar[N_ul]);
123
+
124
+ nm::math::laswp<DType>(N_dr, Ar, lda, 0, N_ul, ipiv, 1);
125
+
126
+ nm::math::trsm<DType>(CblasRowMajor, CblasRight, CblasUpper, CblasNoTrans, CblasUnit, N_dr, N_ul, one, A, lda, Ar, lda);
127
+ nm::math::gemm<DType>(CblasRowMajor, CblasNoTrans, CblasNoTrans, N_dr, N-N_ul, N_ul, &neg_one, Ar, lda, Ac, lda, &one, An, lda);
128
+
129
+ i = getrf_nothrow<true,DType>(N_dr, N-N_ul, An, lda, ipiv+N_ul);
130
+ } else {
131
+ Ar = NULL;
132
+ Ac = &(A[N_ul * lda]);
133
+ An = &(Ac[N_ul]);
134
+
135
+ nm::math::laswp<DType>(N_dr, Ac, lda, 0, N_ul, ipiv, 1);
136
+
137
+ nm::math::trsm<DType>(CblasColMajor, CblasLeft, CblasLower, CblasNoTrans, CblasUnit, N_ul, N_dr, one, A, lda, Ac, lda);
138
+ nm::math::gemm<DType>(CblasColMajor, CblasNoTrans, CblasNoTrans, M-N_ul, N_dr, N_ul, &neg_one, An, lda, Ac, lda, &one, An, lda);
139
+
140
+ i = getrf_nothrow<false,DType>(M-N_ul, N_dr, An, lda, ipiv+N_ul);
141
+ }
142
+
143
+ if (i) if (!ierr) ierr = N_ul + i;
144
+
145
+ for (i = N_ul; i != MN; i++) {
146
+ ipiv[i] += N_ul;
147
+ }
148
+
149
+ nm::math::laswp<DType>(N_ul, A, lda, N_ul, MN, ipiv, 1); /* apply pivots */
150
+
151
+ } else if (MN == 1) { // there's another case for the colmajor version, but i don't know that it's that critical. Calls ATLAS LU2, who knows what that does.
152
+
153
+ int i = *ipiv = nm::math::idamax<DType>(N, A, 1); // cblas_iamax(N, A, 1);
154
+
155
+ DType tmp = A[i];
156
+ if (tmp != 0) {
157
+
158
+ nm::math::scal<DType>((RowMajor ? N : M), nm::math::numeric_inverse(tmp), A, 1);
159
+ A[i] = *A;
160
+ *A = tmp;
161
+
162
+ } else ierr = 1;
163
+
164
+ }
165
+ return(ierr);
166
+ }
167
+
168
+
169
+ /*
170
+ * From ATLAS 3.8.0:
171
+ *
172
+ * Computes one of two LU factorizations based on the setting of the Order
173
+ * parameter, as follows:
174
+ * ----------------------------------------------------------------------------
175
+ * Order == CblasColMajor
176
+ * Column-major factorization of form
177
+ * A = P * L * U
178
+ * where P is a row-permutation matrix, L is lower triangular with unit
179
+ * diagonal elements (lower trapazoidal if M > N), and U is upper triangular
180
+ * (upper trapazoidal if M < N).
181
+ *
182
+ * ----------------------------------------------------------------------------
183
+ * Order == CblasRowMajor
184
+ * Row-major factorization of form
185
+ * A = P * L * U
186
+ * where P is a column-permutation matrix, L is lower triangular (lower
187
+ * trapazoidal if M > N), and U is upper triangular with unit diagonals (upper
188
+ * trapazoidal if M < N).
189
+ *
190
+ * ============================================================================
191
+ * Let IERR be the return value of the function:
192
+ * If IERR == 0, successful exit.
193
+ * If (IERR < 0) the -IERR argument had an illegal value
194
+ * If (IERR > 0 && Order == CblasColMajor)
195
+ * U(i-1,i-1) is exactly zero. The factorization has been completed,
196
+ * but the factor U is exactly singular, and division by zero will
197
+ * occur if it is used to solve a system of equations.
198
+ * If (IERR > 0 && Order == CblasRowMajor)
199
+ * L(i-1,i-1) is exactly zero. The factorization has been completed,
200
+ * but the factor L is exactly singular, and division by zero will
201
+ * occur if it is used to solve a system of equations.
202
+ */
203
+ template <typename DType>
204
+ inline int getrf(const enum CBLAS_ORDER Order, const int M, const int N, DType* A, int lda, int* ipiv) {
205
+ if (Order == CblasRowMajor) {
206
+ if (lda < std::max(1,N)) {
207
+ rb_raise(rb_eArgError, "GETRF: lda must be >= MAX(N,1): lda=%d N=%d", lda, N);
208
+ return -6;
209
+ }
210
+
211
+ return getrf_nothrow<true,DType>(M, N, A, lda, ipiv);
212
+ } else {
213
+ if (lda < std::max(1,M)) {
214
+ rb_raise(rb_eArgError, "GETRF: lda must be >= MAX(M,1): lda=%d M=%d", lda, M);
215
+ return -6;
216
+ }
217
+
218
+ return getrf_nothrow<false,DType>(M, N, A, lda, ipiv);
219
+ //rb_raise(rb_eNotImpError, "column major getrf not implemented");
220
+ }
221
+ }
222
+
223
+
224
+
225
+ /*
226
+ * Function signature conversion for calling LAPACK's getrf functions as directly as possible.
227
+ *
228
+ * For documentation: http://www.netlib.org/lapack/double/dgetrf.f
229
+ *
230
+ * This function should normally go in math.cpp, but we need it to be available to nmatrix.cpp.
231
+ */
232
+ template <typename DType>
233
+ inline int clapack_getrf(const enum CBLAS_ORDER order, const int m, const int n, void* a, const int lda, int* ipiv) {
234
+ return getrf<DType>(order, m, n, reinterpret_cast<DType*>(a), lda, ipiv);
235
+ }
236
+
237
+
238
+ } } // end nm::math
239
+
240
+ #endif