nmatrix 0.0.7 → 0.0.8

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 2c1dc8a7e82a68a64da1bbf5f52107e064eb1f62
4
- data.tar.gz: dd24f0e30a500f7a397ffcddb21c8cbb4fbfd3d2
3
+ metadata.gz: 29f34a19d63da6254ccd6557dc500391a6c766d2
4
+ data.tar.gz: e70dcc15d4d154d60dfdc12ffc28eaf05705b37a
5
5
  SHA512:
6
- metadata.gz: 4368ba0104a54a72397beaa607cb58dc2c44763833ced2a532c6fe9626b3405d89c0d75b924950b19f02567530c1979dd9a1e463b1b0d812bc77e5b02812277e
7
- data.tar.gz: c8a9094c70c19d7a45fc08f5237a123e6c694e32be358e9c39fa7d9e30c7f0c2e903674951f677d24d4fc725dee67d23b59669e1354b66cd96d8574de8312659
6
+ metadata.gz: 96ce3ba8f13ac7ee5f75a8e27cc3e878dc7d12aa98691c00082ceb342c86f9123e43a1e6593c87142c10bd119f571ad8ff4fb77f48b555b6dc4d86290de24778
7
+ data.tar.gz: 232b743d7b6272e4abcd15a82298d3179fa2f222dc16d754bc5f898bff98aab1c0d0a28d387527dce14b4e500d4d167e61d5636c0570303ffae2238f1b6dd825
data/History.txt CHANGED
@@ -378,3 +378,14 @@
378
378
  * Fixed conversion from Ruby object to Complex and Rational
379
379
 
380
380
  * Fixed memory leak in slicing
381
+
382
+ === 0.0.8 / 2013-08-23
383
+
384
+ * 2 bug fixes:
385
+
386
+ * Fixed Ubuntu compilation bug caused by math header file
387
+ refactoring
388
+
389
+ * Fixed pry version error which manifests on some systems but not
390
+ others
391
+
data/Manifest.txt CHANGED
@@ -54,7 +54,6 @@ ext/nmatrix/storage/yale.h
54
54
  ext/nmatrix/util/sl_list.cpp
55
55
  ext/nmatrix/util/sl_list.h
56
56
  ext/nmatrix/util/util.h
57
- ext/nmatrix/util/lapack.h
58
57
  ext/nmatrix/math.cpp
59
58
  ext/nmatrix/math/asum.h
60
59
  ext/nmatrix/math/geev.h
@@ -68,7 +67,7 @@ ext/nmatrix/math/getrf.h
68
67
  ext/nmatrix/math/getri.h
69
68
  ext/nmatrix/math/getrs.h
70
69
  ext/nmatrix/math/idamax.h
71
- ext/nmatrix/math/lapack.h
70
+ ext/nmatrix/math/inc.h
72
71
  ext/nmatrix/math/laswp.h
73
72
  ext/nmatrix/math/long_dtype.h
74
73
  ext/nmatrix/math/math.h
data/README.rdoc CHANGED
@@ -37,7 +37,7 @@ If you want to obtain the latest (development) code, you should generally do:
37
37
  bundle install
38
38
  bundle exec rake compile
39
39
  bundle exec rake repackage
40
- gem install pkg/nmatrix-0.0.7.gem
40
+ gem install pkg/nmatrix-0.0.8.gem
41
41
 
42
42
  Detailed instructions are available for {Mac}[https://github.com/SciRuby/nmatrix/wiki/Installation#mac-os-x] and {Linux}[https://github.com/SciRuby/nmatrix/wiki/Installation#linux].
43
43
 
@@ -79,7 +79,7 @@ Read the instructions in +CONTRIBUTING.md+ if you want to help NMatrix.
79
79
 
80
80
  == Features
81
81
 
82
- The following features exist in the current version of NMatrix (0.0.7):
82
+ The following features exist in the current version of NMatrix (0.0.8):
83
83
 
84
84
  * Matrix and vector storage containers: dense, yale, list (more to come)
85
85
  * Data types: byte (uint8), int8, int16, int32, int64, float32, float64, complex64, complex128, rational64, rational128,
@@ -147,15 +147,16 @@ end
147
147
  have_header("clapack.h")
148
148
  have_header("cblas.h")
149
149
 
150
- have_func("clapack_dgetrf", "clapack.h")
150
+ have_func("clapack_dgetrf", ["cblas.h", "clapack.h"])
151
+ have_func("clapack_dgetri", ["cblas.h", "clapack.h"])
151
152
  have_func("dgesvd_", "clapack.h")
152
153
 
154
+ have_func("cblas_dgemm", "cblas.h")
155
+
153
156
 
154
157
  #find_library("cblas", "cblas_dgemm")
155
158
  #find_library("atlas", "ATL_dgemmNN")
156
159
 
157
- have_func("cblas_dgemm", "cblas.h")
158
-
159
160
  # Order matters here: ATLAS has to go after LAPACK: http://mail.scipy.org/pipermail/scipy-user/2007-January/010717.html
160
161
  $libs += " -llapack -lcblas -latlas "
161
162
 
data/ext/nmatrix/math.cpp CHANGED
@@ -112,9 +112,11 @@
112
112
  * Project Includes
113
113
  */
114
114
 
115
+
115
116
  #include <algorithm>
116
117
  #include <limits>
117
118
 
119
+ #include "math/inc.h"
118
120
  #include "data/data.h"
119
121
  #include "math/gesdd.h"
120
122
  #include "math/gesvd.h"
@@ -59,6 +59,7 @@
59
59
  #ifndef GETRI_H
60
60
  #define GETRI_H
61
61
 
62
+
62
63
  namespace nm { namespace math {
63
64
 
64
65
  template <typename DType>
@@ -0,0 +1,41 @@
1
+ /////////////////////////////////////////////////////////////////////
2
+ // = NMatrix
3
+ //
4
+ // A linear algebra library for scientific computation in Ruby.
5
+ // NMatrix is part of SciRuby.
6
+ //
7
+ // NMatrix was originally inspired by and derived from NArray, by
8
+ // Masahiro Tanaka: http://narray.rubyforge.org
9
+ //
10
+ // == Copyright Information
11
+ //
12
+ // SciRuby is Copyright (c) 2010 - 2013, Ruby Science Foundation
13
+ // NMatrix is Copyright (c) 2013, Ruby Science Foundation
14
+ //
15
+ // Please see LICENSE.txt for additional copyright notices.
16
+ //
17
+ // == Contributing
18
+ //
19
+ // By contributing source code to SciRuby, you agree to be bound by
20
+ // our Contributor Agreement:
21
+ //
22
+ // * https://github.com/SciRuby/sciruby/wiki/Contributor-Agreement
23
+ //
24
+ // == inc.h
25
+ //
26
+ // Includes needed for LAPACK, CLAPACK, and CBLAS functions.
27
+ //
28
+
29
+ #ifndef INC_H
30
+ # define INC_H
31
+
32
+
33
+ extern "C" { // These need to be in an extern "C" block or you'll get all kinds of undefined symbol errors.
34
+ #include <cblas.h>
35
+
36
+ #ifdef HAVE_CLAPACK_H
37
+ #include <clapack.h>
38
+ #endif
39
+ }
40
+
41
+ #endif // INC_H
@@ -82,7 +82,6 @@ extern "C" { // These need to be in an extern "C" block or you'll get all kinds
82
82
  /*
83
83
  * Project Includes
84
84
  */
85
- #include "lapack.h"
86
85
 
87
86
  /*
88
87
  * Macros
@@ -31,6 +31,7 @@
31
31
  * Standard Includes
32
32
  */
33
33
 
34
+ #include <cblas.h>
34
35
  #ifdef HAVE_CLAPACK_H
35
36
  extern "C" {
36
37
  #include <clapack.h>
@@ -26,6 +26,6 @@ class NMatrix
26
26
  # Note that the format of the VERSION string is needed for NMatrix
27
27
  # native IO. If you change the format, please make sure that native
28
28
  # IO can still understand NMatrix::VERSION.
29
- VERSION = "0.0.7"
29
+ VERSION = "0.0.8"
30
30
  end
31
31
 
data/nmatrix.gemspec CHANGED
@@ -54,7 +54,7 @@ EOF
54
54
  gem.add_development_dependency 'rake', '~>0.9'
55
55
  gem.add_development_dependency 'bundler'
56
56
  gem.add_development_dependency 'rspec', '~>2.9.0'
57
- gem.add_development_dependency 'pry', '~>0.9.9'
57
+ gem.add_development_dependency 'pry'
58
58
  gem.add_development_dependency 'guard-rspec', '~>0.7.0'
59
59
  gem.add_development_dependency 'rake-compiler', '~>0.8.1'
60
60
  end
data/spec/lapack_spec.rb CHANGED
@@ -1,4 +1,3 @@
1
- require 'pry'
2
1
  # = NMatrix
3
2
  #
4
3
  # A linear algebra library for scientific computation in Ruby.
@@ -126,7 +125,7 @@ describe NMatrix::LAPACK do
126
125
  end
127
126
  end
128
127
 
129
- it "exposes gesdd" do
128
+ it "exposes lapack gesdd" do
130
129
  if [:float32, :float64].include? dtype
131
130
  a = NMatrix.new([5,6], %w|8.79 9.93 9.83 5.45 3.16
132
131
  6.11 6.91 5.04 -0.27 7.98
@@ -187,7 +186,7 @@ describe NMatrix::LAPACK do
187
186
  end
188
187
 
189
188
 
190
- it "exposes gesvd" do
189
+ it "exposes lapack gesvd" do
191
190
  # http://software.intel.com/sites/products/documentation/doclib/mkl_sa/11/mkl_lapack_examples/dgesvd_ex.c.htm
192
191
  if [:float32, :float64].include? dtype
193
192
  a = NMatrix.new([5,6], %w|8.79 9.93 9.83 5.45 3.16
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: nmatrix
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.7
4
+ version: 0.0.8
5
5
  platform: ruby
6
6
  authors:
7
7
  - John Woods
@@ -10,7 +10,7 @@ authors:
10
10
  autorequire:
11
11
  bindir: bin
12
12
  cert_chain: []
13
- date: 2013-08-22 00:00:00.000000000 Z
13
+ date: 2013-08-23 00:00:00.000000000 Z
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency
16
16
  name: rdoc
@@ -72,16 +72,16 @@ dependencies:
72
72
  name: pry
73
73
  requirement: !ruby/object:Gem::Requirement
74
74
  requirements:
75
- - - ~>
75
+ - - '>='
76
76
  - !ruby/object:Gem::Version
77
- version: 0.9.9
77
+ version: '0'
78
78
  type: :development
79
79
  prerelease: false
80
80
  version_requirements: !ruby/object:Gem::Requirement
81
81
  requirements:
82
- - - ~>
82
+ - - '>='
83
83
  - !ruby/object:Gem::Version
84
- version: 0.9.9
84
+ version: '0'
85
85
  - !ruby/object:Gem::Dependency
86
86
  name: guard-rspec
87
87
  requirement: !ruby/object:Gem::Requirement
@@ -149,7 +149,7 @@ files:
149
149
  - ext/nmatrix/math/getri.h
150
150
  - ext/nmatrix/math/getrs.h
151
151
  - ext/nmatrix/math/idamax.h
152
- - ext/nmatrix/math/lapack.h
152
+ - ext/nmatrix/math/inc.h
153
153
  - ext/nmatrix/math/laswp.h
154
154
  - ext/nmatrix/math/long_dtype.h
155
155
  - ext/nmatrix/math/math.h
@@ -1,879 +0,0 @@
1
- /////////////////////////////////////////////////////////////////////
2
- // = NMatrix
3
- //
4
- // A linear algebra library for scientific computation in Ruby.
5
- // NMatrix is part of SciRuby.
6
- //
7
- // NMatrix was originally inspired by and derived from NArray, by
8
- // Masahiro Tanaka: http://narray.rubyforge.org
9
- //
10
- // == Copyright Information
11
- //
12
- // SciRuby is Copyright (c) 2010 - 2013, Ruby Science Foundation
13
- // NMatrix is Copyright (c) 2013, Ruby Science Foundation
14
- //
15
- // Please see LICENSE.txt for additional copyright notices.
16
- //
17
- // == Contributing
18
- //
19
- // By contributing source code to SciRuby, you agree to be bound by
20
- // our Contributor Agreement:
21
- //
22
- // * https://github.com/SciRuby/sciruby/wiki/Contributor-Agreement
23
- //
24
- // == lapack.h
25
- //
26
- // Templated versions of LAPACK functions, in C++.
27
-
28
- #ifndef LAPACK_H
29
- #define LAPACK_H
30
-
31
- #include <cmath> // std::round
32
-
33
- #include "math.h"
34
-
35
- namespace nm { namespace math { namespace lapack {
36
-
37
-
38
-
39
- /* -- LAPACK auxiliary routine (version 3.2) -- */
40
- /* -- LAPACK is a software package provided by Univ. of Tennessee, -- */
41
- /* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
42
- /* November 2006 */
43
-
44
- /* .. Scalar Arguments .. */
45
-
46
- /* Purpose */
47
- /* ======= */
48
-
49
- /* This program sets problem and machine dependent parameters */
50
- /* useful for xHSEQR and its subroutines. It is called whenever */
51
- /* ILAENV is called with 12 <= ISPEC <= 16 */
52
-
53
- /* Arguments */
54
- /* ========= */
55
-
56
- /* ISPEC (input) int scalar */
57
- /* ISPEC specifies which tunable parameter IPARMQ should */
58
- /* return. */
59
-
60
- /* ISPEC=12: (INMIN) Matrices of order nmin or less */
61
- /* are sent directly to xLAHQR, the implicit */
62
- /* double shift QR algorithm. NMIN must be */
63
- /* at least 11. */
64
-
65
- /* ISPEC=13: (INWIN) Size of the deflation window. */
66
- /* This is best set greater than or equal to */
67
- /* the number of simultaneous shifts NS. */
68
- /* Larger matrices benefit from larger deflation */
69
- /* windows. */
70
-
71
- /* ISPEC=14: (INIBL) Determines when to stop nibbling and */
72
- /* invest in an (expensive) multi-shift QR sweep. */
73
- /* If the aggressive early deflation subroutine */
74
- /* finds LD converged eigenvalues from an order */
75
- /* NW deflation window and LD.GT.(NW*NIBBLE)/100, */
76
- /* then the next QR sweep is skipped and early */
77
- /* deflation is applied immediately to the */
78
- /* remaining active diagonal block. Setting */
79
- /* IPARMQ(ISPEC=14) = 0 causes TTQRE to skip a */
80
- /* multi-shift QR sweep whenever early deflation */
81
- /* finds a converged eigenvalue. Setting */
82
- /* IPARMQ(ISPEC=14) greater than or equal to 100 */
83
- /* prevents TTQRE from skipping a multi-shift */
84
- /* QR sweep. */
85
-
86
- /* ISPEC=15: (NSHFTS) The number of simultaneous shifts in */
87
- /* a multi-shift QR iteration. */
88
-
89
- /* ISPEC=16: (IACC22) IPARMQ is set to 0, 1 or 2 with the */
90
- /* following meanings. */
91
- /* 0: During the multi-shift QR sweep, */
92
- /* xLAQR5 does not accumulate reflections and */
93
- /* does not use matrix-matrix multiply to */
94
- /* update the far-from-diagonal matrix */
95
- /* entries. */
96
- /* 1: During the multi-shift QR sweep, */
97
- /* xLAQR5 and/or xLAQRaccumulates reflections and uses */
98
- /* matrix-matrix multiply to update the */
99
- /* far-from-diagonal matrix entries. */
100
- /* 2: During the multi-shift QR sweep. */
101
- /* xLAQR5 accumulates reflections and takes */
102
- /* advantage of 2-by-2 block structure during */
103
- /* matrix-matrix multiplies. */
104
- /* (If xTRMM is slower than xGEMM, then */
105
- /* IPARMQ(ISPEC=16)=1 may be more efficient than */
106
- /* IPARMQ(ISPEC=16)=2 despite the greater level of */
107
- /* arithmetic work implied by the latter choice.) */
108
-
109
- /* NAME (input) character string */
110
- /* Name of the calling subroutine */
111
-
112
- /* OPTS (input) character string */
113
- /* This is a concatenation of the string arguments to */
114
- /* TTQRE. */
115
-
116
- /* N (input) int scalar */
117
- /* N is the order of the Hessenberg matrix H. */
118
-
119
- /* ILO (input) INTEGER */
120
- /* IHI (input) INTEGER */
121
- /* It is assumed that H is already upper triangular */
122
- /* in rows and columns 1:ILO-1 and IHI+1:N. */
123
-
124
- /* LWORK (input) int scalar */
125
- /* The amount of workspace available. */
126
-
127
- /* Further Details */
128
- /* =============== */
129
-
130
- /* Little is known about how best to choose these parameters. */
131
- /* It is possible to use different values of the parameters */
132
- /* for each of CHSEQR, DHSEQR, SHSEQR and ZHSEQR. */
133
-
134
- /* It is probably best to choose different parameters for */
135
- /* different matrices and different parameters at different */
136
- /* times during the iteration, but this has not been */
137
- /* implemented --- yet. */
138
-
139
-
140
- /* The best choices of most of the parameters depend */
141
- /* in an ill-understood way on the relative execution */
142
- /* rate of xLAQR3 and xLAQR5 and on the nature of each */
143
- /* particular eigenvalue problem. Experiment may be the */
144
- /* only practical way to determine which choices are most */
145
- /* effective. */
146
-
147
- /* Following is a list of default values supplied by IPARMQ. */
148
- /* These defaults may be adjusted in order to attain better */
149
- /* performance in any particular computational environment. */
150
-
151
- /* IPARMQ(ISPEC=12) The xLAHQR vs xLAQR0 crossover point. */
152
- /* Default: 75. (Must be at least 11.) */
153
-
154
- /* IPARMQ(ISPEC=13) Recommended deflation window size. */
155
- /* This depends on ILO, IHI and NS, the */
156
- /* number of simultaneous shifts returned */
157
- /* by IPARMQ(ISPEC=15). The default for */
158
- /* (IHI-ILO+1).LE.500 is NS. The default */
159
- /* for (IHI-ILO+1).GT.500 is 3*NS/2. */
160
-
161
- /* IPARMQ(ISPEC=14) Nibble crossover point. Default: 14. */
162
-
163
- /* IPARMQ(ISPEC=15) Number of simultaneous shifts, NS. */
164
- /* a multi-shift QR iteration. */
165
-
166
- /* If IHI-ILO+1 is ... */
167
-
168
- /* greater than ...but less ... the */
169
- /* or equal to ... than default is */
170
-
171
- /* 0 30 NS = 2+ */
172
- /* 30 60 NS = 4+ */
173
- /* 60 150 NS = 10 */
174
- /* 150 590 NS = ** */
175
- /* 590 3000 NS = 64 */
176
- /* 3000 6000 NS = 128 */
177
- /* 6000 infinity NS = 256 */
178
-
179
- /* (+) By default matrices of this order are */
180
- /* passed to the implicit double shift routine */
181
- /* xLAHQR. See IPARMQ(ISPEC=12) above. These */
182
- /* values of NS are used only in case of a rare */
183
- /* xLAHQR failure. */
184
-
185
- /* (**) The asterisks (**) indicate an ad-hoc */
186
- /* function increasing from 10 to 64. */
187
-
188
- /* IPARMQ(ISPEC=16) Select structured matrix multiply. */
189
- /* (See ISPEC=16 above for details.) */
190
- /* Default: 3. */
191
-
192
- /* ================================================================ */
193
- inline int iparmq(int ispec, int ilo, int ihi) {
194
-
195
- const int INMIN = 12, INWIN = 13, INIBL = 14, ISHFTS = 15, IACC22 = 16;
196
- const int NMIN = 75, K22MIN = 14, KACMIN = 14, NIBBLE = 14, KNWSWP = 500;
197
-
198
- int ns = 2, nh = ihi - ilo + 1;
199
-
200
- if (ispec == ISHFTS || ispec == INWIN|| ispec == IACC22) {
201
-
202
- /* ==== Set the number of simultaneous shifts ==== */
203
- if (nh >= 30) ns = 4;
204
- if (nh >= 60) ns = 10;
205
- if (nh >= 150) ns = std::max(10, (int)(nh / std::round(std::log((float) (ihi - ilo + 1)) / log(2.f)))); /* Computing MAX */
206
- if (nh >= 590) ns = 64;
207
- if (nh >= 3000) ns = 128;
208
- if (nh >= 6000) ns = 256;
209
- ns = std::max(2,ns - ns % 2); /* Computing MAX */
210
- }
211
-
212
- if (ispec == INMIN) {
213
- /* ===== Matrices of order smaller than NMIN get sent */
214
- /* . to xLAHQR, the classic double shift algorithm. */
215
- /* . This must be at least 11. ==== */
216
- return NMIN;
217
-
218
- } else if (ispec == INIBL) {
219
-
220
- /* ==== INIBL: skip a multi-shift qr iteration and */
221
- /* . whenever aggressive early deflation finds */
222
- /* . at least (NIBBLE*(window size)/100) deflations. ==== */
223
-
224
- return NIBBLE;
225
-
226
- } else if (ispec == ISHFTS) {
227
-
228
- /* ==== NSHFTS: The number of simultaneous shifts ===== */
229
- return ns;
230
-
231
- } else if (ispec == INWIN) {
232
-
233
- /* ==== NW: deflation window size. ==== */
234
-
235
- if (nh <= KNWSWP) return ns;
236
- else return ns * 3 / 2;
237
-
238
- } else if (ispec == 16) {
239
-
240
- /* ==== IACC22: Whether to accumulate reflections */
241
- /* . before updating the far-from-diagonal elements */
242
- /* . and whether to use 2-by-2 block structure while */
243
- /* . doing it. A small amount of work could be saved */
244
- /* . by making this choice dependent also upon the */
245
- /* . NH=IHI-ILO+1. */
246
-
247
- if (ns >= KACMIN) return 1;
248
- if (ns >= K22MIN) return 2;
249
-
250
- }
251
-
252
- return -1;
253
- } /* iparmq_ */
254
-
255
-
256
-
257
-
258
- /* Purpose */
259
- /* ======= */
260
-
261
- /* DGER performs the rank 1 operation */
262
-
263
- /* A := alpha*x*y**T + A, */
264
-
265
- /* where alpha is a scalar, x is an m element vector, y is an n element */
266
- /* vector and A is an m by n matrix. */
267
-
268
- /* Arguments */
269
- /* ========== */
270
-
271
- /* M - INTEGER. */
272
- /* On entry, M specifies the number of rows of the matrix A. */
273
- /* M must be at least zero. */
274
- /* Unchanged on exit. */
275
-
276
- /* N - INTEGER. */
277
- /* On entry, N specifies the number of columns of the matrix A. */
278
- /* N must be at least zero. */
279
- /* Unchanged on exit. */
280
-
281
- /* ALPHA - DOUBLE PRECISION. */
282
- /* On entry, ALPHA specifies the scalar alpha. */
283
- /* Unchanged on exit. */
284
-
285
- /* X - DOUBLE PRECISION array of dimension at least */
286
- /* ( 1 + ( m - 1 )*abs( INCX ) ). */
287
- /* Before entry, the incremented array X must contain the m */
288
- /* element vector x. */
289
- /* Unchanged on exit. */
290
-
291
- /* INCX - INTEGER. */
292
- /* On entry, INCX specifies the increment for the elements of */
293
- /* X. INCX must not be zero. */
294
- /* Unchanged on exit. */
295
-
296
- /* Y - DOUBLE PRECISION array of dimension at least */
297
- /* ( 1 + ( n - 1 )*abs( INCY ) ). */
298
- /* Before entry, the incremented array Y must contain the n */
299
- /* element vector y. */
300
- /* Unchanged on exit. */
301
-
302
- /* INCY - INTEGER. */
303
- /* On entry, INCY specifies the increment for the elements of */
304
- /* Y. INCY must not be zero. */
305
- /* Unchanged on exit. */
306
-
307
- /* A - DOUBLE PRECISION array of DIMENSION ( LDA, n ). */
308
- /* Before entry, the leading m by n part of the array A must */
309
- /* contain the matrix of coefficients. On exit, A is */
310
- /* overwritten by the updated matrix. */
311
-
312
- /* LDA - INTEGER. */
313
- /* On entry, LDA specifies the first dimension of A as declared */
314
- /* in the calling (sub) program. LDA must be at least */
315
- /* max( 1, m ). */
316
- /* Unchanged on exit. */
317
-
318
- /* Further Details */
319
- /* =============== */
320
-
321
- /* Level 2 Blas routine. */
322
-
323
- /* -- Written on 22-October-1986. */
324
- /* Jack Dongarra, Argonne National Lab. */
325
- /* Jeremy Du Croz, Nag Central Office. */
326
- /* Sven Hammarling, Nag Central Office. */
327
- /* Richard Hanson, Sandia National Labs. */
328
-
329
- /* ===================================================================== */
330
-
331
-
332
-
333
-
334
- /* Purpose */
335
- /* ======= */
336
-
337
- /* interchanges two vectors. */
338
- /* uses unrolled loops for increments equal one. */
339
-
340
- /* Further Details */
341
- /* =============== */
342
-
343
- /* jack dongarra, linpack, 3/11/78. */
344
- /* modified 12/3/93, array(1) declarations changed to array(*) */
345
-
346
- /* ===================================================================== */
347
- // Formerly dswap
348
-
349
-
350
-
351
-
352
- /* -- LAPACK auxiliary routine (version 3.3.1) -- */
353
- /* -- LAPACK is a software package provided by Univ. of Tennessee, -- */
354
- /* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
355
- /* -- April 2011 -- */
356
-
357
- /* .. Scalar Arguments .. */
358
- /* .. */
359
-
360
- /* Purpose */
361
- /* ======= */
362
-
363
- /* IEEECK is called from the ILAENV to verify that Infinity and */
364
- /* possibly NaN arithmetic is safe (i.e. will not trap). */
365
- // FIXME: Can we use std::numeric_limits::traps for this?
366
-
367
- /* Arguments */
368
- /* ========= */
369
-
370
- /* ISPEC (input) INTEGER */
371
- /* Specifies whether to test just for inifinity arithmetic */
372
- /* or whether to test for infinity and NaN arithmetic. */
373
- /* = 0: Verify infinity arithmetic only. */
374
- /* = 1: Verify infinity and NaN arithmetic. */
375
-
376
- /* ZERO (input) REAL */
377
- /* Must contain the value 0.0 */
378
- /* This is passed to prevent the compiler from optimizing */
379
- /* away this code. */
380
-
381
- /* ONE (input) REAL */
382
- /* Must contain the value 1.0 */
383
- /* This is passed to prevent the compiler from optimizing */
384
- /* away this code. */
385
-
386
- /* RETURN VALUE: INTEGER */
387
- /* = 0: Arithmetic failed to produce the correct answers */
388
- /* = 1: Arithmetic produced the correct answers */
389
-
390
- /* ===================================================================== */
391
-
392
- /*
393
- * Note from John: This seems totally unnecessary in modern C++.
394
- * FIXME: Remove this after testing that on modern systems this always returns 1.
395
- */
396
-
397
- inline int ieeeck(bool ispec) {
398
-
399
- float posinf = 1.0 / 0.0;
400
- if (posinf <= 1.0) return 0;
401
-
402
- float neginf = -1.0 / 0.0;
403
- if (neginf >= 0.0) return 0;
404
-
405
- float negzro = 1.0 / (neginf + 1.0);
406
- if (negzro != 0.0) return 0;
407
-
408
- neginf = 1.0 / negzro;
409
- if (neginf >= 0.0) return 0;
410
-
411
- float newzro = negzro + 0.0;
412
- if (newzro != 0.0) return 0;
413
-
414
- posinf = 1.0 / newzro;
415
- if (posinf <= 1.0) return 0;
416
-
417
- neginf *= posinf;
418
- if (neginf >= 0.0) return 0;
419
-
420
- posinf *= posinf;
421
- if (posinf <= 1.0) return 0;
422
-
423
-
424
- /* Return if we were only asked to check infinity arithmetic */
425
-
426
- if (!ispec) return 1;
427
-
428
- float nan1 = posinf + neginf;
429
- float nan2 = posinf / neginf;
430
- float nan3 = posinf / posinf;
431
- float nan4 = posinf * 0.0;
432
- float nan5 = neginf * negzro;
433
- float nan6 = nan5 * 0.0;
434
-
435
- if (nan1 == nan1) return 0;
436
- if (nan2 == nan2) return 0;
437
- if (nan3 == nan3) return 0;
438
- if (nan4 == nan4) return 0;
439
- if (nan5 == nan5) return 0;
440
- if (nan6 == nan6) return 0;
441
-
442
- return 1;
443
- } /* ieeeck_ */
444
-
445
-
446
-
447
-
448
- inline int ilaenv_block_size(int n2, int n4, const std::string& c2, const std::string& c3, const std::string& c4, bool sname, bool cname) {
449
- if (c2 == "GE") { //(s_cmp(c2, "GE", (size_t)2, (size_t)2) == 0) {
450
- if (c3 == "TRF") { //if (s_cmp(c3, "TRF", (size_t)3, (size_t)3) == 0) {
451
- if (sname) return 64;
452
- else return 64;
453
- } else if (c3 == "QRF" || c3 == "RQF" || c3 == "LQF" || c3 == "QLF") { //(s_cmp(c3, "QRF", (size_t)3, (size_t)3) == 0 || s_cmp(c3, "RQF", (size_t)3, (size_t)3) == 0 || s_cmp(c3, "LQF", (size_t) 3, (size_t)3) == 0 || s_cmp(c3, "QLF", (size_t)3, (size_t)3) == 0) {
454
- if (sname) return 32;
455
- else return 32;
456
- } else if (c3 == "HRD") {
457
- if (sname) return 32;
458
- else return 32;
459
- } else if (c3 == "BRD") {
460
- if (sname) return 32;
461
- else return 32;
462
- } else if (c3 == "TRI") {
463
- if (sname) return 64;
464
- else return 64;
465
- }
466
- } else if (c2 == "PO") {
467
- if (c3 == "TRF") {
468
- if (sname) return 64;
469
- else return 64;
470
- }
471
- } else if (c2 == "SY") {
472
- if (c3 == "TRF") {
473
- if (sname) return 64;
474
- else return 64;
475
- } else if (sname && c3 == "TRD") {
476
- return 32;
477
- } else if (sname && c3 == "GST") {
478
- return 64;
479
- }
480
- } else if (cname && c2 == "HE") {
481
- if (c3 == "TRF") return 64;
482
- else if (c3 == "TRD") return 32;
483
- else if (c3 == "GST") return 64;
484
- } else if (sname && c2 == "OR") {
485
- if (c3.at(0) == 'G') {
486
- if (c4 == "QR" || c4 == "RQ" || c4 == "LQ" || c4 == "QL" || c4 == "HR" || c4 == "TR" || c4 == "BR") return 32;
487
- } else if (c3.at(0) == 'M') {
488
- if (c4 == "QR" || c4 == "RQ" || c4 == "LQ" || c4 == "QL" || c4 == "HR" || c4 == "TR" || c4 == "BR") return 32;
489
- }
490
- } else if (cname && c2 == "UN") {
491
- if (c3.at(0) == 'G') {
492
- if (c4 == "QR" || c4 == "RQ" || c4 == "LQ" || c4 == "QL" || c4 == "HR" || c4 == "TR" || c4 == "BR") return 32;
493
- } else if (c3.at(0) == 'M') {
494
- if (c4 == "QR" || c4 == "RQ" || c4 == "LQ" || c4 == "QL" || c4 == "HR" || c4 == "TR" || c4 == "BR") return 32;
495
- }
496
- } else if (c2 == "GB") {
497
- if (c3 == "TRF") {
498
- if (sname) {
499
- if (n4 <= 64) return 1;
500
- else return 32;
501
- } else {
502
- if (n4 <= 64) return 1;
503
- else return 32;
504
- }
505
- }
506
- } else if (c2 == "PB") {
507
- if (c3 == "TRF") {
508
- if (sname) {
509
- if (n2 <= 64) return 1;
510
- else return 32;
511
- } else {
512
- if (n2 <= 64) return 1;
513
- else return 32;
514
- }
515
- }
516
- } else if (c2 == "TR") {
517
- if (c3 == "TRI") {
518
- if (sname) return 64;
519
- else return 64;
520
- }
521
- } else if (c2 == "LA") {
522
- if (c3 == "UUM") {
523
- if (sname) return 64;
524
- else return 64;
525
- }
526
- } else if (sname && c2 == "ST") {
527
- if (c3 == "EBZ") return 1;
528
- }
529
- return 1;
530
- }
531
-
532
-
533
- inline int ilaenv_min_block_size(const std::string& c2, const std::string& c3, const std::string& c4, bool sname, bool cname) {
534
-
535
- if (c2 == "GE") {
536
- if (c3 == "QRF" || c3 == "RQF" || c3 == "LQF" || c3 == "QLF") {
537
- if (sname) {
538
- return 2;
539
- } else {
540
- return 2;
541
- }
542
- } else if (c3 == "HRD") {
543
- if (sname) {
544
- return 2;
545
- } else {
546
- return 2;
547
- }
548
- } else if (c3 == "BRD") {
549
- if (sname) {
550
- return 2;
551
- } else {
552
- return 2;
553
- }
554
- } else if (c3 == "TRI") {
555
- if (sname) {
556
- return 2;
557
- } else {
558
- return 2;
559
- }
560
- }
561
- } else if (c2 == "SY") {
562
- if (c3 == "TRF") {
563
- if (sname) {
564
- return 8;
565
- } else {
566
- return 8;
567
- }
568
- } else if (sname && c3 == "TRD") {
569
- return 2;
570
- }
571
- } else if (cname && c2 == "HE") {
572
- if (c3 == "TRD") {
573
- return 2;
574
- }
575
- } else if (sname && c2 == "OR") {
576
- if (c3.at(0) == 'G') {
577
- if (c4 == "QR" || c4 == "RQ" || c4 == "LQ" || c4 == "QL" || c4 == "HR" || c4 == "TR" || c4 == "BR") {
578
- return 2;
579
- }
580
- } else if (c3.at(0) == 'M') {
581
- if (c4 == "QR" || c4 == "RQ" || c4 == "LQ" || c4 == "QL" || c4 == "HR" || c4 == "TR" || c4 == "BR") {
582
- return 2;
583
- }
584
- }
585
- } else if (cname && c2 == "UN") {
586
- if (c3.at(0) == 'G') {
587
- if (c4 == "QR" || c4 == "RQ" || c4 == "LQ" || c4 == "QL" || c4 == "HR" || c4 == "TR" || c4 == "BR") {
588
- return 2;
589
- }
590
- } else if (c3.at(0) == 'M') {
591
- if (c4 == "QR" || c4 == "RQ" || c4 == "LQ" || c4 == "QL" || c4 == "HR" || c4 == "TR" || c4 == "BR") {
592
- return 2;
593
- }
594
- }
595
- }
596
- return 2;
597
- }
598
-
599
-
600
- inline int ilaenv_crossover_point(const std::string& c2, const std::string& c3, const std::string& c4, bool sname, bool cname) {
601
- if (c2 == "GE") {
602
- if (c3 == "QRF" || c3 == "RQF" || c3 == "LQF" || c3 == "QLF") {
603
- if (sname) {
604
- return 128;
605
- } else {
606
- return 128;
607
- }
608
- } else if (c3 == "HRD") {
609
- if (sname) {
610
- return 128;
611
- } else {
612
- return 128;
613
- }
614
- } else if (c3 == "BRD") {
615
- if (sname) {
616
- return 128;
617
- } else {
618
- return 128;
619
- }
620
- }
621
- } else if (c2 == "SY") {
622
- if (sname && c3 == "TRD") {
623
- return 32;
624
- }
625
- } else if (cname && c2 == "HE") {
626
- if (c3 == "TRD") {
627
- return 32;
628
- }
629
- } else if (sname && c2 == "OR") {
630
- if (c3.at(0) == 'G') {
631
- if (c4 == "QR" || c4 == "RQ" || c4 == "LQ" || c4 == "QL" || c4 == "HR" || c4 == "TR" || c4 == "BR") {
632
- return 128;
633
- }
634
- }
635
- } else if (cname && c2 == "UN") {
636
- if (c3.at(0) == 'G') {
637
- if (c4 == "QR" || c4 == "RQ" || c4 == "LQ" || c4 == "QL" || c4 == "HR" || c4 == "TR" || c4 == "BR") {
638
- return 128;
639
- }
640
- }
641
- }
642
- return 0;
643
- }
644
-
645
-
646
- /* -- LAPACK auxiliary routine (version 3.2.1) -- */
647
-
648
- /* -- April 2009 -- */
649
-
650
- /* -- LAPACK is a software package provided by Univ. of Tennessee, -- */
651
- /* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
652
-
653
- /* .. Scalar Arguments .. */
654
- /* .. */
655
-
656
- /* Purpose */
657
- /* ======= */
658
-
659
- /* ILAENV is called from the LAPACK routines to choose problem-dependent */
660
- /* parameters for the local environment. See ISPEC for a description of */
661
- /* the parameters. */
662
-
663
- /* ILAENV returns an INTEGER */
664
- /* if ILAENV >= 0: ILAENV returns the value of the parameter specified by ISPEC */
665
- /* if ILAENV < 0: if ILAENV = -k, the k-th argument had an illegal value. */
666
-
667
- /* This version provides a set of parameters which should give good, */
668
- /* but not optimal, performance on many of the currently available */
669
- /* computers. Users are encouraged to modify this subroutine to set */
670
- /* the tuning parameters for their particular machine using the option */
671
- /* and problem size information in the arguments. */
672
-
673
- /* This routine will not function correctly if it is converted to all */
674
- /* lower case. Converting it to all upper case is allowed. */
675
-
676
- /* Arguments */
677
- /* ========= */
678
-
679
- /* ISPEC (input) INTEGER */
680
- /* Specifies the parameter to be returned as the value of */
681
- /* ILAENV. */
682
- /* = 1: the optimal blocksize; if this value is 1, an unblocked */
683
- /* algorithm will give the best performance. */
684
- /* = 2: the minimum block size for which the block routine */
685
- /* should be used; if the usable block size is less than */
686
- /* this value, an unblocked routine should be used. */
687
- /* = 3: the crossover point (in a block routine, for N less */
688
- /* than this value, an unblocked routine should be used) */
689
- /* = 4: the number of shifts, used in the nonsymmetric */
690
- /* eigenvalue routines (DEPRECATED) */
691
- /* = 5: the minimum column dimension for blocking to be used; */
692
- /* rectangular blocks must have dimension at least k by m, */
693
- /* where k is given by ILAENV(2,...) and m by ILAENV(5,...) */
694
- /* = 6: the crossover point for the SVD (when reducing an m by n */
695
- /* matrix to bidiagonal form, if max(m,n)/min(m,n) exceeds */
696
- /* this value, a QR factorization is used first to reduce */
697
- /* the matrix to a triangular form.) */
698
- /* = 7: the number of processors */
699
- /* = 8: the crossover point for the multishift QR method */
700
- /* for nonsymmetric eigenvalue problems (DEPRECATED) */
701
- /* = 9: maximum size of the subproblems at the bottom of the */
702
- /* computation tree in the divide-and-conquer algorithm */
703
- /* (used by xGELSD and xGESDD) */
704
- /* =10: ieee NaN arithmetic can be trusted not to trap */
705
- /* =11: infinity arithmetic can be trusted not to trap */
706
- /* 12 <= ISPEC <= 16: */
707
- /* xHSEQR or one of its subroutines, */
708
- /* see IPARMQ for detailed explanation */
709
-
710
- /* NAME (input) CHARACTER*(*) */
711
- /* The name of the calling subroutine, in either upper case or */
712
- /* lower case. */
713
-
714
- /* OPTS (input) CHARACTER*(*) */
715
- /* The character options to the subroutine NAME, concatenated */
716
- /* into a single character string. For example, UPLO = 'U', */
717
- /* TRANS = 'T', and DIAG = 'N' for a triangular routine would */
718
- /* be specified as OPTS = 'UTN'. */
719
-
720
- /* N1 (input) INTEGER */
721
- /* N2 (input) INTEGER */
722
- /* N3 (input) INTEGER */
723
- /* N4 (input) INTEGER */
724
- /* Problem dimensions for the subroutine NAME; these may not all */
725
- /* be required. */
726
-
727
- /* Further Details */
728
- /* =============== */
729
-
730
- /* The following conventions have been used when calling ILAENV from the */
731
- /* LAPACK routines: */
732
- /* 1) OPTS is a concatenation of all of the character options to */
733
- /* subroutine NAME, in the same order that they appear in the */
734
- /* argument list for NAME, even if they are not used in determining */
735
- /* the value of the parameter specified by ISPEC. */
736
- /* 2) The problem dimensions N1, N2, N3, N4 are specified in the order */
737
- /* that they appear in the argument list for NAME. N1 is used */
738
- /* first, N2 second, and so on, and unused problem dimensions are */
739
- /* passed a value of -1. */
740
- /* 3) The parameter value returned by ILAENV is checked for validity in */
741
- /* the calling subroutine. For example, ILAENV is used to retrieve */
742
- /* the optimal blocksize for STRTRI as follows: */
743
-
744
- /* NB = ILAENV( 1, 'STRTRI', UPLO // DIAG, N, -1, -1, -1 ) */
745
- /* IF( NB.LE.1 ) NB = MAX( 1, N ) */
746
-
747
- /* ===================================================================== */
748
- inline int ilaenv(int ispec, const std::string& name, int n1, int n2, int n3, int n4) {
749
-
750
- if (ispec < 1 || ispec > 3) {
751
- switch (ispec) {
752
- case 4: return 6; /* ISPEC = 4: number of shifts (used by xHSEQR) */
753
- case 5: return 2; /* ISPEC = 5: minimum column dimension (not used) */
754
- case 6: /* ISPEC = 6: crossover point for SVD (used by xGELSS and xGESVD) */
755
- return (int) ((float)std::min(n1, n2) * 1.6f);
756
- case 7: return 1; /* ISPEC = 7: number of processors (not used) */
757
- case 8: return 50; /* ISPEC = 8: crossover point for multishift (used by xHSEQR) */
758
- case 9: return 25; /* ISPEC = 9: maximum size of the subproblems at the bottom of the */
759
- /* computation tree in the divide-and-conquer algorithm */
760
- /* (used by xGELSD and xGESDD) */
761
- case 10: /* ISPEC = 10: ieee NaN arithmetic can be trusted not to trap */
762
- return ieeeck(1);
763
-
764
- case 11: /* ISPEC = 11: infinity arithmetic can be trusted not to trap */
765
- return ieeeck(0);
766
-
767
- default:
768
- if (ispec >= 12 && ispec <= 16) { /* 12 <= ISPEC <= 16: xHSEQR or one of its subroutines. */
769
- return iparmq(ispec, n2, n3);
770
- } else {
771
- return -1; /* Invalid value for ISPEC */
772
- }
773
- }
774
- }
775
-
776
-
777
- /* Convert NAME to upper case if the first character is lower case. */
778
-
779
- std::string subnam(name);
780
- std::transform(subnam.begin(), subnam.end(), subnam.begin(), ::toupper);
781
- std::string c1(subnam);
782
-
783
- bool sname = c1.at(0) == 'S' || c1.at(0) == 'D',
784
- cname = c1.at(0) == 'C' || c1.at(0) == 'Z';
785
-
786
- if (! (cname || sname)) return 1;
787
-
788
- std::string c2(subnam.substr(1, 2)),
789
- c3(subnam.substr(3, 3)),
790
- c4(c3.substr(1, 2));
791
-
792
- if (ispec == 2) return ilaenv_min_block_size(c2, c3, c4, sname, cname);
793
- if (ispec == 3) return ilaenv_crossover_point(c2, c3, c4, sname, cname);
794
- return ilaenv_block_size(n2, n4, c2, c3, c4, sname, cname);
795
-
796
- } /* ilaenv_ */
797
-
798
-
799
-
800
- template <typename DType>
801
- inline int lsame(char const *ca, char const *cb, int ca_len, int cb_len)
802
- {
803
- int ret_val;
804
-
805
- int static inta, intb, zcode;
806
- /* -- LAPACK auxiliary routine (version 3.2) -- */
807
- /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
808
- /* November 2006 */
809
-
810
- /* .. Scalar Arguments .. */
811
- /* .. */
812
-
813
- /* Purpose */
814
- /* ======= */
815
-
816
- /* LSAME returns .TRUE. if CA is the same letter as CB regardless of */
817
- /* case. */
818
-
819
- /* Arguments */
820
- /* ========= */
821
-
822
- /* CA (input) CHARACTER*1 */
823
- /* CB (input) CHARACTER*1 */
824
- /* CA and CB specify the single characters to be compared. */
825
-
826
- /* ===================================================================== */
827
- inta = *(unsigned char *)ca;
828
- intb = *(unsigned char *)cb;
829
- ret_val = inta == intb;
830
- if (ret_val) {
831
- return ret_val;
832
- }
833
-
834
- zcode = (int)"Z";
835
-
836
- if (zcode == 90 || zcode == 122 ) {
837
- /* ASCII is assumed - ZCODE is the ASCII code of either lower or */
838
- /* upper case 'Z'. */
839
- if (inta >=97 && inta <= 122) {
840
- inta += -32;
841
- }
842
- if (intb >= 97 && intb <= 122) {
843
- intb += -32;
844
- }
845
- } else if (zcode == 233 || zcode == 169) {
846
- /* EBCDIC is assumed - ZCODE is the EBCDIC code of either lower or */
847
- /* upper case 'Z'. */
848
- if (inta >= 129 && inta <= 137 || inta >= 145 && inta <= 153 || inta >= 162 && inta <= 169) {
849
- inta += 64;
850
- }
851
- if (intb >= 129 && intb <= 137 || intb >= 145 && intb <= 153 || intb >= 162 && intb <= 169) {
852
- intb += 64;
853
- }
854
- } else if (zcode = 218 || zcode == 250) {
855
- /* ASCII is assumed, on Prime machines - ZCODE is the ASCII code */
856
- /* plus 128 of either lower or upper case 'Z'. */
857
- if (inta >= 225 && inta <= 250) {
858
- inta += -32;
859
- }
860
- if (intb >= 225 && intb <= 250) {
861
- intb += -32;
862
- }
863
- }
864
- ret_val = inta == intb;
865
-
866
- return ret_val;
867
- }
868
-
869
-
870
-
871
-
872
- } // end namespace lapack
873
-
874
-
875
-
876
-
877
- }}
878
-
879
- #endif