nmatrix 0.0.7 → 0.0.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 2c1dc8a7e82a68a64da1bbf5f52107e064eb1f62
4
- data.tar.gz: dd24f0e30a500f7a397ffcddb21c8cbb4fbfd3d2
3
+ metadata.gz: 29f34a19d63da6254ccd6557dc500391a6c766d2
4
+ data.tar.gz: e70dcc15d4d154d60dfdc12ffc28eaf05705b37a
5
5
  SHA512:
6
- metadata.gz: 4368ba0104a54a72397beaa607cb58dc2c44763833ced2a532c6fe9626b3405d89c0d75b924950b19f02567530c1979dd9a1e463b1b0d812bc77e5b02812277e
7
- data.tar.gz: c8a9094c70c19d7a45fc08f5237a123e6c694e32be358e9c39fa7d9e30c7f0c2e903674951f677d24d4fc725dee67d23b59669e1354b66cd96d8574de8312659
6
+ metadata.gz: 96ce3ba8f13ac7ee5f75a8e27cc3e878dc7d12aa98691c00082ceb342c86f9123e43a1e6593c87142c10bd119f571ad8ff4fb77f48b555b6dc4d86290de24778
7
+ data.tar.gz: 232b743d7b6272e4abcd15a82298d3179fa2f222dc16d754bc5f898bff98aab1c0d0a28d387527dce14b4e500d4d167e61d5636c0570303ffae2238f1b6dd825
data/History.txt CHANGED
@@ -378,3 +378,14 @@
378
378
  * Fixed conversion from Ruby object to Complex and Rational
379
379
 
380
380
  * Fixed memory leak in slicing
381
+
382
+ === 0.0.8 / 2013-08-23
383
+
384
+ * 2 bug fixes:
385
+
386
+ * Fixed Ubuntu compilation bug caused by math header file
387
+ refactoring
388
+
389
+ * Fixed pry version error which manifests on some systems but not
390
+ others
391
+
data/Manifest.txt CHANGED
@@ -54,7 +54,6 @@ ext/nmatrix/storage/yale.h
54
54
  ext/nmatrix/util/sl_list.cpp
55
55
  ext/nmatrix/util/sl_list.h
56
56
  ext/nmatrix/util/util.h
57
- ext/nmatrix/util/lapack.h
58
57
  ext/nmatrix/math.cpp
59
58
  ext/nmatrix/math/asum.h
60
59
  ext/nmatrix/math/geev.h
@@ -68,7 +67,7 @@ ext/nmatrix/math/getrf.h
68
67
  ext/nmatrix/math/getri.h
69
68
  ext/nmatrix/math/getrs.h
70
69
  ext/nmatrix/math/idamax.h
71
- ext/nmatrix/math/lapack.h
70
+ ext/nmatrix/math/inc.h
72
71
  ext/nmatrix/math/laswp.h
73
72
  ext/nmatrix/math/long_dtype.h
74
73
  ext/nmatrix/math/math.h
data/README.rdoc CHANGED
@@ -37,7 +37,7 @@ If you want to obtain the latest (development) code, you should generally do:
37
37
  bundle install
38
38
  bundle exec rake compile
39
39
  bundle exec rake repackage
40
- gem install pkg/nmatrix-0.0.7.gem
40
+ gem install pkg/nmatrix-0.0.8.gem
41
41
 
42
42
  Detailed instructions are available for {Mac}[https://github.com/SciRuby/nmatrix/wiki/Installation#mac-os-x] and {Linux}[https://github.com/SciRuby/nmatrix/wiki/Installation#linux].
43
43
 
@@ -79,7 +79,7 @@ Read the instructions in +CONTRIBUTING.md+ if you want to help NMatrix.
79
79
 
80
80
  == Features
81
81
 
82
- The following features exist in the current version of NMatrix (0.0.7):
82
+ The following features exist in the current version of NMatrix (0.0.8):
83
83
 
84
84
  * Matrix and vector storage containers: dense, yale, list (more to come)
85
85
  * Data types: byte (uint8), int8, int16, int32, int64, float32, float64, complex64, complex128, rational64, rational128,
@@ -147,15 +147,16 @@ end
147
147
  have_header("clapack.h")
148
148
  have_header("cblas.h")
149
149
 
150
- have_func("clapack_dgetrf", "clapack.h")
150
+ have_func("clapack_dgetrf", ["cblas.h", "clapack.h"])
151
+ have_func("clapack_dgetri", ["cblas.h", "clapack.h"])
151
152
  have_func("dgesvd_", "clapack.h")
152
153
 
154
+ have_func("cblas_dgemm", "cblas.h")
155
+
153
156
 
154
157
  #find_library("cblas", "cblas_dgemm")
155
158
  #find_library("atlas", "ATL_dgemmNN")
156
159
 
157
- have_func("cblas_dgemm", "cblas.h")
158
-
159
160
  # Order matters here: ATLAS has to go after LAPACK: http://mail.scipy.org/pipermail/scipy-user/2007-January/010717.html
160
161
  $libs += " -llapack -lcblas -latlas "
161
162
 
data/ext/nmatrix/math.cpp CHANGED
@@ -112,9 +112,11 @@
112
112
  * Project Includes
113
113
  */
114
114
 
115
+
115
116
  #include <algorithm>
116
117
  #include <limits>
117
118
 
119
+ #include "math/inc.h"
118
120
  #include "data/data.h"
119
121
  #include "math/gesdd.h"
120
122
  #include "math/gesvd.h"
@@ -59,6 +59,7 @@
59
59
  #ifndef GETRI_H
60
60
  #define GETRI_H
61
61
 
62
+
62
63
  namespace nm { namespace math {
63
64
 
64
65
  template <typename DType>
@@ -0,0 +1,41 @@
1
+ /////////////////////////////////////////////////////////////////////
2
+ // = NMatrix
3
+ //
4
+ // A linear algebra library for scientific computation in Ruby.
5
+ // NMatrix is part of SciRuby.
6
+ //
7
+ // NMatrix was originally inspired by and derived from NArray, by
8
+ // Masahiro Tanaka: http://narray.rubyforge.org
9
+ //
10
+ // == Copyright Information
11
+ //
12
+ // SciRuby is Copyright (c) 2010 - 2013, Ruby Science Foundation
13
+ // NMatrix is Copyright (c) 2013, Ruby Science Foundation
14
+ //
15
+ // Please see LICENSE.txt for additional copyright notices.
16
+ //
17
+ // == Contributing
18
+ //
19
+ // By contributing source code to SciRuby, you agree to be bound by
20
+ // our Contributor Agreement:
21
+ //
22
+ // * https://github.com/SciRuby/sciruby/wiki/Contributor-Agreement
23
+ //
24
+ // == inc.h
25
+ //
26
+ // Includes needed for LAPACK, CLAPACK, and CBLAS functions.
27
+ //
28
+
29
+ #ifndef INC_H
30
+ # define INC_H
31
+
32
+
33
+ extern "C" { // These need to be in an extern "C" block or you'll get all kinds of undefined symbol errors.
34
+ #include <cblas.h>
35
+
36
+ #ifdef HAVE_CLAPACK_H
37
+ #include <clapack.h>
38
+ #endif
39
+ }
40
+
41
+ #endif // INC_H
@@ -82,7 +82,6 @@ extern "C" { // These need to be in an extern "C" block or you'll get all kinds
82
82
  /*
83
83
  * Project Includes
84
84
  */
85
- #include "lapack.h"
86
85
 
87
86
  /*
88
87
  * Macros
@@ -31,6 +31,7 @@
31
31
  * Standard Includes
32
32
  */
33
33
 
34
+ #include <cblas.h>
34
35
  #ifdef HAVE_CLAPACK_H
35
36
  extern "C" {
36
37
  #include <clapack.h>
@@ -26,6 +26,6 @@ class NMatrix
26
26
  # Note that the format of the VERSION string is needed for NMatrix
27
27
  # native IO. If you change the format, please make sure that native
28
28
  # IO can still understand NMatrix::VERSION.
29
- VERSION = "0.0.7"
29
+ VERSION = "0.0.8"
30
30
  end
31
31
 
data/nmatrix.gemspec CHANGED
@@ -54,7 +54,7 @@ EOF
54
54
  gem.add_development_dependency 'rake', '~>0.9'
55
55
  gem.add_development_dependency 'bundler'
56
56
  gem.add_development_dependency 'rspec', '~>2.9.0'
57
- gem.add_development_dependency 'pry', '~>0.9.9'
57
+ gem.add_development_dependency 'pry'
58
58
  gem.add_development_dependency 'guard-rspec', '~>0.7.0'
59
59
  gem.add_development_dependency 'rake-compiler', '~>0.8.1'
60
60
  end
data/spec/lapack_spec.rb CHANGED
@@ -1,4 +1,3 @@
1
- require 'pry'
2
1
  # = NMatrix
3
2
  #
4
3
  # A linear algebra library for scientific computation in Ruby.
@@ -126,7 +125,7 @@ describe NMatrix::LAPACK do
126
125
  end
127
126
  end
128
127
 
129
- it "exposes gesdd" do
128
+ it "exposes lapack gesdd" do
130
129
  if [:float32, :float64].include? dtype
131
130
  a = NMatrix.new([5,6], %w|8.79 9.93 9.83 5.45 3.16
132
131
  6.11 6.91 5.04 -0.27 7.98
@@ -187,7 +186,7 @@ describe NMatrix::LAPACK do
187
186
  end
188
187
 
189
188
 
190
- it "exposes gesvd" do
189
+ it "exposes lapack gesvd" do
191
190
  # http://software.intel.com/sites/products/documentation/doclib/mkl_sa/11/mkl_lapack_examples/dgesvd_ex.c.htm
192
191
  if [:float32, :float64].include? dtype
193
192
  a = NMatrix.new([5,6], %w|8.79 9.93 9.83 5.45 3.16
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: nmatrix
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.7
4
+ version: 0.0.8
5
5
  platform: ruby
6
6
  authors:
7
7
  - John Woods
@@ -10,7 +10,7 @@ authors:
10
10
  autorequire:
11
11
  bindir: bin
12
12
  cert_chain: []
13
- date: 2013-08-22 00:00:00.000000000 Z
13
+ date: 2013-08-23 00:00:00.000000000 Z
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency
16
16
  name: rdoc
@@ -72,16 +72,16 @@ dependencies:
72
72
  name: pry
73
73
  requirement: !ruby/object:Gem::Requirement
74
74
  requirements:
75
- - - ~>
75
+ - - '>='
76
76
  - !ruby/object:Gem::Version
77
- version: 0.9.9
77
+ version: '0'
78
78
  type: :development
79
79
  prerelease: false
80
80
  version_requirements: !ruby/object:Gem::Requirement
81
81
  requirements:
82
- - - ~>
82
+ - - '>='
83
83
  - !ruby/object:Gem::Version
84
- version: 0.9.9
84
+ version: '0'
85
85
  - !ruby/object:Gem::Dependency
86
86
  name: guard-rspec
87
87
  requirement: !ruby/object:Gem::Requirement
@@ -149,7 +149,7 @@ files:
149
149
  - ext/nmatrix/math/getri.h
150
150
  - ext/nmatrix/math/getrs.h
151
151
  - ext/nmatrix/math/idamax.h
152
- - ext/nmatrix/math/lapack.h
152
+ - ext/nmatrix/math/inc.h
153
153
  - ext/nmatrix/math/laswp.h
154
154
  - ext/nmatrix/math/long_dtype.h
155
155
  - ext/nmatrix/math/math.h
@@ -1,879 +0,0 @@
1
- /////////////////////////////////////////////////////////////////////
2
- // = NMatrix
3
- //
4
- // A linear algebra library for scientific computation in Ruby.
5
- // NMatrix is part of SciRuby.
6
- //
7
- // NMatrix was originally inspired by and derived from NArray, by
8
- // Masahiro Tanaka: http://narray.rubyforge.org
9
- //
10
- // == Copyright Information
11
- //
12
- // SciRuby is Copyright (c) 2010 - 2013, Ruby Science Foundation
13
- // NMatrix is Copyright (c) 2013, Ruby Science Foundation
14
- //
15
- // Please see LICENSE.txt for additional copyright notices.
16
- //
17
- // == Contributing
18
- //
19
- // By contributing source code to SciRuby, you agree to be bound by
20
- // our Contributor Agreement:
21
- //
22
- // * https://github.com/SciRuby/sciruby/wiki/Contributor-Agreement
23
- //
24
- // == lapack.h
25
- //
26
- // Templated versions of LAPACK functions, in C++.
27
-
28
- #ifndef LAPACK_H
29
- #define LAPACK_H
30
-
31
- #include <cmath> // std::round
32
-
33
- #include "math.h"
34
-
35
- namespace nm { namespace math { namespace lapack {
36
-
37
-
38
-
39
- /* -- LAPACK auxiliary routine (version 3.2) -- */
40
- /* -- LAPACK is a software package provided by Univ. of Tennessee, -- */
41
- /* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
42
- /* November 2006 */
43
-
44
- /* .. Scalar Arguments .. */
45
-
46
- /* Purpose */
47
- /* ======= */
48
-
49
- /* This program sets problem and machine dependent parameters */
50
- /* useful for xHSEQR and its subroutines. It is called whenever */
51
- /* ILAENV is called with 12 <= ISPEC <= 16 */
52
-
53
- /* Arguments */
54
- /* ========= */
55
-
56
- /* ISPEC (input) int scalar */
57
- /* ISPEC specifies which tunable parameter IPARMQ should */
58
- /* return. */
59
-
60
- /* ISPEC=12: (INMIN) Matrices of order nmin or less */
61
- /* are sent directly to xLAHQR, the implicit */
62
- /* double shift QR algorithm. NMIN must be */
63
- /* at least 11. */
64
-
65
- /* ISPEC=13: (INWIN) Size of the deflation window. */
66
- /* This is best set greater than or equal to */
67
- /* the number of simultaneous shifts NS. */
68
- /* Larger matrices benefit from larger deflation */
69
- /* windows. */
70
-
71
- /* ISPEC=14: (INIBL) Determines when to stop nibbling and */
72
- /* invest in an (expensive) multi-shift QR sweep. */
73
- /* If the aggressive early deflation subroutine */
74
- /* finds LD converged eigenvalues from an order */
75
- /* NW deflation window and LD.GT.(NW*NIBBLE)/100, */
76
- /* then the next QR sweep is skipped and early */
77
- /* deflation is applied immediately to the */
78
- /* remaining active diagonal block. Setting */
79
- /* IPARMQ(ISPEC=14) = 0 causes TTQRE to skip a */
80
- /* multi-shift QR sweep whenever early deflation */
81
- /* finds a converged eigenvalue. Setting */
82
- /* IPARMQ(ISPEC=14) greater than or equal to 100 */
83
- /* prevents TTQRE from skipping a multi-shift */
84
- /* QR sweep. */
85
-
86
- /* ISPEC=15: (NSHFTS) The number of simultaneous shifts in */
87
- /* a multi-shift QR iteration. */
88
-
89
- /* ISPEC=16: (IACC22) IPARMQ is set to 0, 1 or 2 with the */
90
- /* following meanings. */
91
- /* 0: During the multi-shift QR sweep, */
92
- /* xLAQR5 does not accumulate reflections and */
93
- /* does not use matrix-matrix multiply to */
94
- /* update the far-from-diagonal matrix */
95
- /* entries. */
96
- /* 1: During the multi-shift QR sweep, */
97
- /* xLAQR5 and/or xLAQRaccumulates reflections and uses */
98
- /* matrix-matrix multiply to update the */
99
- /* far-from-diagonal matrix entries. */
100
- /* 2: During the multi-shift QR sweep. */
101
- /* xLAQR5 accumulates reflections and takes */
102
- /* advantage of 2-by-2 block structure during */
103
- /* matrix-matrix multiplies. */
104
- /* (If xTRMM is slower than xGEMM, then */
105
- /* IPARMQ(ISPEC=16)=1 may be more efficient than */
106
- /* IPARMQ(ISPEC=16)=2 despite the greater level of */
107
- /* arithmetic work implied by the latter choice.) */
108
-
109
- /* NAME (input) character string */
110
- /* Name of the calling subroutine */
111
-
112
- /* OPTS (input) character string */
113
- /* This is a concatenation of the string arguments to */
114
- /* TTQRE. */
115
-
116
- /* N (input) int scalar */
117
- /* N is the order of the Hessenberg matrix H. */
118
-
119
- /* ILO (input) INTEGER */
120
- /* IHI (input) INTEGER */
121
- /* It is assumed that H is already upper triangular */
122
- /* in rows and columns 1:ILO-1 and IHI+1:N. */
123
-
124
- /* LWORK (input) int scalar */
125
- /* The amount of workspace available. */
126
-
127
- /* Further Details */
128
- /* =============== */
129
-
130
- /* Little is known about how best to choose these parameters. */
131
- /* It is possible to use different values of the parameters */
132
- /* for each of CHSEQR, DHSEQR, SHSEQR and ZHSEQR. */
133
-
134
- /* It is probably best to choose different parameters for */
135
- /* different matrices and different parameters at different */
136
- /* times during the iteration, but this has not been */
137
- /* implemented --- yet. */
138
-
139
-
140
- /* The best choices of most of the parameters depend */
141
- /* in an ill-understood way on the relative execution */
142
- /* rate of xLAQR3 and xLAQR5 and on the nature of each */
143
- /* particular eigenvalue problem. Experiment may be the */
144
- /* only practical way to determine which choices are most */
145
- /* effective. */
146
-
147
- /* Following is a list of default values supplied by IPARMQ. */
148
- /* These defaults may be adjusted in order to attain better */
149
- /* performance in any particular computational environment. */
150
-
151
- /* IPARMQ(ISPEC=12) The xLAHQR vs xLAQR0 crossover point. */
152
- /* Default: 75. (Must be at least 11.) */
153
-
154
- /* IPARMQ(ISPEC=13) Recommended deflation window size. */
155
- /* This depends on ILO, IHI and NS, the */
156
- /* number of simultaneous shifts returned */
157
- /* by IPARMQ(ISPEC=15). The default for */
158
- /* (IHI-ILO+1).LE.500 is NS. The default */
159
- /* for (IHI-ILO+1).GT.500 is 3*NS/2. */
160
-
161
- /* IPARMQ(ISPEC=14) Nibble crossover point. Default: 14. */
162
-
163
- /* IPARMQ(ISPEC=15) Number of simultaneous shifts, NS. */
164
- /* a multi-shift QR iteration. */
165
-
166
- /* If IHI-ILO+1 is ... */
167
-
168
- /* greater than ...but less ... the */
169
- /* or equal to ... than default is */
170
-
171
- /* 0 30 NS = 2+ */
172
- /* 30 60 NS = 4+ */
173
- /* 60 150 NS = 10 */
174
- /* 150 590 NS = ** */
175
- /* 590 3000 NS = 64 */
176
- /* 3000 6000 NS = 128 */
177
- /* 6000 infinity NS = 256 */
178
-
179
- /* (+) By default matrices of this order are */
180
- /* passed to the implicit double shift routine */
181
- /* xLAHQR. See IPARMQ(ISPEC=12) above. These */
182
- /* values of NS are used only in case of a rare */
183
- /* xLAHQR failure. */
184
-
185
- /* (**) The asterisks (**) indicate an ad-hoc */
186
- /* function increasing from 10 to 64. */
187
-
188
- /* IPARMQ(ISPEC=16) Select structured matrix multiply. */
189
- /* (See ISPEC=16 above for details.) */
190
- /* Default: 3. */
191
-
192
- /* ================================================================ */
193
- inline int iparmq(int ispec, int ilo, int ihi) {
194
-
195
- const int INMIN = 12, INWIN = 13, INIBL = 14, ISHFTS = 15, IACC22 = 16;
196
- const int NMIN = 75, K22MIN = 14, KACMIN = 14, NIBBLE = 14, KNWSWP = 500;
197
-
198
- int ns = 2, nh = ihi - ilo + 1;
199
-
200
- if (ispec == ISHFTS || ispec == INWIN|| ispec == IACC22) {
201
-
202
- /* ==== Set the number of simultaneous shifts ==== */
203
- if (nh >= 30) ns = 4;
204
- if (nh >= 60) ns = 10;
205
- if (nh >= 150) ns = std::max(10, (int)(nh / std::round(std::log((float) (ihi - ilo + 1)) / log(2.f)))); /* Computing MAX */
206
- if (nh >= 590) ns = 64;
207
- if (nh >= 3000) ns = 128;
208
- if (nh >= 6000) ns = 256;
209
- ns = std::max(2,ns - ns % 2); /* Computing MAX */
210
- }
211
-
212
- if (ispec == INMIN) {
213
- /* ===== Matrices of order smaller than NMIN get sent */
214
- /* . to xLAHQR, the classic double shift algorithm. */
215
- /* . This must be at least 11. ==== */
216
- return NMIN;
217
-
218
- } else if (ispec == INIBL) {
219
-
220
- /* ==== INIBL: skip a multi-shift qr iteration and */
221
- /* . whenever aggressive early deflation finds */
222
- /* . at least (NIBBLE*(window size)/100) deflations. ==== */
223
-
224
- return NIBBLE;
225
-
226
- } else if (ispec == ISHFTS) {
227
-
228
- /* ==== NSHFTS: The number of simultaneous shifts ===== */
229
- return ns;
230
-
231
- } else if (ispec == INWIN) {
232
-
233
- /* ==== NW: deflation window size. ==== */
234
-
235
- if (nh <= KNWSWP) return ns;
236
- else return ns * 3 / 2;
237
-
238
- } else if (ispec == 16) {
239
-
240
- /* ==== IACC22: Whether to accumulate reflections */
241
- /* . before updating the far-from-diagonal elements */
242
- /* . and whether to use 2-by-2 block structure while */
243
- /* . doing it. A small amount of work could be saved */
244
- /* . by making this choice dependent also upon the */
245
- /* . NH=IHI-ILO+1. */
246
-
247
- if (ns >= KACMIN) return 1;
248
- if (ns >= K22MIN) return 2;
249
-
250
- }
251
-
252
- return -1;
253
- } /* iparmq_ */
254
-
255
-
256
-
257
-
258
- /* Purpose */
259
- /* ======= */
260
-
261
- /* DGER performs the rank 1 operation */
262
-
263
- /* A := alpha*x*y**T + A, */
264
-
265
- /* where alpha is a scalar, x is an m element vector, y is an n element */
266
- /* vector and A is an m by n matrix. */
267
-
268
- /* Arguments */
269
- /* ========== */
270
-
271
- /* M - INTEGER. */
272
- /* On entry, M specifies the number of rows of the matrix A. */
273
- /* M must be at least zero. */
274
- /* Unchanged on exit. */
275
-
276
- /* N - INTEGER. */
277
- /* On entry, N specifies the number of columns of the matrix A. */
278
- /* N must be at least zero. */
279
- /* Unchanged on exit. */
280
-
281
- /* ALPHA - DOUBLE PRECISION. */
282
- /* On entry, ALPHA specifies the scalar alpha. */
283
- /* Unchanged on exit. */
284
-
285
- /* X - DOUBLE PRECISION array of dimension at least */
286
- /* ( 1 + ( m - 1 )*abs( INCX ) ). */
287
- /* Before entry, the incremented array X must contain the m */
288
- /* element vector x. */
289
- /* Unchanged on exit. */
290
-
291
- /* INCX - INTEGER. */
292
- /* On entry, INCX specifies the increment for the elements of */
293
- /* X. INCX must not be zero. */
294
- /* Unchanged on exit. */
295
-
296
- /* Y - DOUBLE PRECISION array of dimension at least */
297
- /* ( 1 + ( n - 1 )*abs( INCY ) ). */
298
- /* Before entry, the incremented array Y must contain the n */
299
- /* element vector y. */
300
- /* Unchanged on exit. */
301
-
302
- /* INCY - INTEGER. */
303
- /* On entry, INCY specifies the increment for the elements of */
304
- /* Y. INCY must not be zero. */
305
- /* Unchanged on exit. */
306
-
307
- /* A - DOUBLE PRECISION array of DIMENSION ( LDA, n ). */
308
- /* Before entry, the leading m by n part of the array A must */
309
- /* contain the matrix of coefficients. On exit, A is */
310
- /* overwritten by the updated matrix. */
311
-
312
- /* LDA - INTEGER. */
313
- /* On entry, LDA specifies the first dimension of A as declared */
314
- /* in the calling (sub) program. LDA must be at least */
315
- /* max( 1, m ). */
316
- /* Unchanged on exit. */
317
-
318
- /* Further Details */
319
- /* =============== */
320
-
321
- /* Level 2 Blas routine. */
322
-
323
- /* -- Written on 22-October-1986. */
324
- /* Jack Dongarra, Argonne National Lab. */
325
- /* Jeremy Du Croz, Nag Central Office. */
326
- /* Sven Hammarling, Nag Central Office. */
327
- /* Richard Hanson, Sandia National Labs. */
328
-
329
- /* ===================================================================== */
330
-
331
-
332
-
333
-
334
- /* Purpose */
335
- /* ======= */
336
-
337
- /* interchanges two vectors. */
338
- /* uses unrolled loops for increments equal one. */
339
-
340
- /* Further Details */
341
- /* =============== */
342
-
343
- /* jack dongarra, linpack, 3/11/78. */
344
- /* modified 12/3/93, array(1) declarations changed to array(*) */
345
-
346
- /* ===================================================================== */
347
- // Formerly dswap
348
-
349
-
350
-
351
-
352
- /* -- LAPACK auxiliary routine (version 3.3.1) -- */
353
- /* -- LAPACK is a software package provided by Univ. of Tennessee, -- */
354
- /* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
355
- /* -- April 2011 -- */
356
-
357
- /* .. Scalar Arguments .. */
358
- /* .. */
359
-
360
- /* Purpose */
361
- /* ======= */
362
-
363
- /* IEEECK is called from the ILAENV to verify that Infinity and */
364
- /* possibly NaN arithmetic is safe (i.e. will not trap). */
365
- // FIXME: Can we use std::numeric_limits::traps for this?
366
-
367
- /* Arguments */
368
- /* ========= */
369
-
370
- /* ISPEC (input) INTEGER */
371
- /* Specifies whether to test just for inifinity arithmetic */
372
- /* or whether to test for infinity and NaN arithmetic. */
373
- /* = 0: Verify infinity arithmetic only. */
374
- /* = 1: Verify infinity and NaN arithmetic. */
375
-
376
- /* ZERO (input) REAL */
377
- /* Must contain the value 0.0 */
378
- /* This is passed to prevent the compiler from optimizing */
379
- /* away this code. */
380
-
381
- /* ONE (input) REAL */
382
- /* Must contain the value 1.0 */
383
- /* This is passed to prevent the compiler from optimizing */
384
- /* away this code. */
385
-
386
- /* RETURN VALUE: INTEGER */
387
- /* = 0: Arithmetic failed to produce the correct answers */
388
- /* = 1: Arithmetic produced the correct answers */
389
-
390
- /* ===================================================================== */
391
-
392
- /*
393
- * Note from John: This seems totally unnecessary in modern C++.
394
- * FIXME: Remove this after testing that on modern systems this always returns 1.
395
- */
396
-
397
- inline int ieeeck(bool ispec) {
398
-
399
- float posinf = 1.0 / 0.0;
400
- if (posinf <= 1.0) return 0;
401
-
402
- float neginf = -1.0 / 0.0;
403
- if (neginf >= 0.0) return 0;
404
-
405
- float negzro = 1.0 / (neginf + 1.0);
406
- if (negzro != 0.0) return 0;
407
-
408
- neginf = 1.0 / negzro;
409
- if (neginf >= 0.0) return 0;
410
-
411
- float newzro = negzro + 0.0;
412
- if (newzro != 0.0) return 0;
413
-
414
- posinf = 1.0 / newzro;
415
- if (posinf <= 1.0) return 0;
416
-
417
- neginf *= posinf;
418
- if (neginf >= 0.0) return 0;
419
-
420
- posinf *= posinf;
421
- if (posinf <= 1.0) return 0;
422
-
423
-
424
- /* Return if we were only asked to check infinity arithmetic */
425
-
426
- if (!ispec) return 1;
427
-
428
- float nan1 = posinf + neginf;
429
- float nan2 = posinf / neginf;
430
- float nan3 = posinf / posinf;
431
- float nan4 = posinf * 0.0;
432
- float nan5 = neginf * negzro;
433
- float nan6 = nan5 * 0.0;
434
-
435
- if (nan1 == nan1) return 0;
436
- if (nan2 == nan2) return 0;
437
- if (nan3 == nan3) return 0;
438
- if (nan4 == nan4) return 0;
439
- if (nan5 == nan5) return 0;
440
- if (nan6 == nan6) return 0;
441
-
442
- return 1;
443
- } /* ieeeck_ */
444
-
445
-
446
-
447
-
448
- inline int ilaenv_block_size(int n2, int n4, const std::string& c2, const std::string& c3, const std::string& c4, bool sname, bool cname) {
449
- if (c2 == "GE") { //(s_cmp(c2, "GE", (size_t)2, (size_t)2) == 0) {
450
- if (c3 == "TRF") { //if (s_cmp(c3, "TRF", (size_t)3, (size_t)3) == 0) {
451
- if (sname) return 64;
452
- else return 64;
453
- } else if (c3 == "QRF" || c3 == "RQF" || c3 == "LQF" || c3 == "QLF") { //(s_cmp(c3, "QRF", (size_t)3, (size_t)3) == 0 || s_cmp(c3, "RQF", (size_t)3, (size_t)3) == 0 || s_cmp(c3, "LQF", (size_t) 3, (size_t)3) == 0 || s_cmp(c3, "QLF", (size_t)3, (size_t)3) == 0) {
454
- if (sname) return 32;
455
- else return 32;
456
- } else if (c3 == "HRD") {
457
- if (sname) return 32;
458
- else return 32;
459
- } else if (c3 == "BRD") {
460
- if (sname) return 32;
461
- else return 32;
462
- } else if (c3 == "TRI") {
463
- if (sname) return 64;
464
- else return 64;
465
- }
466
- } else if (c2 == "PO") {
467
- if (c3 == "TRF") {
468
- if (sname) return 64;
469
- else return 64;
470
- }
471
- } else if (c2 == "SY") {
472
- if (c3 == "TRF") {
473
- if (sname) return 64;
474
- else return 64;
475
- } else if (sname && c3 == "TRD") {
476
- return 32;
477
- } else if (sname && c3 == "GST") {
478
- return 64;
479
- }
480
- } else if (cname && c2 == "HE") {
481
- if (c3 == "TRF") return 64;
482
- else if (c3 == "TRD") return 32;
483
- else if (c3 == "GST") return 64;
484
- } else if (sname && c2 == "OR") {
485
- if (c3.at(0) == 'G') {
486
- if (c4 == "QR" || c4 == "RQ" || c4 == "LQ" || c4 == "QL" || c4 == "HR" || c4 == "TR" || c4 == "BR") return 32;
487
- } else if (c3.at(0) == 'M') {
488
- if (c4 == "QR" || c4 == "RQ" || c4 == "LQ" || c4 == "QL" || c4 == "HR" || c4 == "TR" || c4 == "BR") return 32;
489
- }
490
- } else if (cname && c2 == "UN") {
491
- if (c3.at(0) == 'G') {
492
- if (c4 == "QR" || c4 == "RQ" || c4 == "LQ" || c4 == "QL" || c4 == "HR" || c4 == "TR" || c4 == "BR") return 32;
493
- } else if (c3.at(0) == 'M') {
494
- if (c4 == "QR" || c4 == "RQ" || c4 == "LQ" || c4 == "QL" || c4 == "HR" || c4 == "TR" || c4 == "BR") return 32;
495
- }
496
- } else if (c2 == "GB") {
497
- if (c3 == "TRF") {
498
- if (sname) {
499
- if (n4 <= 64) return 1;
500
- else return 32;
501
- } else {
502
- if (n4 <= 64) return 1;
503
- else return 32;
504
- }
505
- }
506
- } else if (c2 == "PB") {
507
- if (c3 == "TRF") {
508
- if (sname) {
509
- if (n2 <= 64) return 1;
510
- else return 32;
511
- } else {
512
- if (n2 <= 64) return 1;
513
- else return 32;
514
- }
515
- }
516
- } else if (c2 == "TR") {
517
- if (c3 == "TRI") {
518
- if (sname) return 64;
519
- else return 64;
520
- }
521
- } else if (c2 == "LA") {
522
- if (c3 == "UUM") {
523
- if (sname) return 64;
524
- else return 64;
525
- }
526
- } else if (sname && c2 == "ST") {
527
- if (c3 == "EBZ") return 1;
528
- }
529
- return 1;
530
- }
531
-
532
-
533
- inline int ilaenv_min_block_size(const std::string& c2, const std::string& c3, const std::string& c4, bool sname, bool cname) {
534
-
535
- if (c2 == "GE") {
536
- if (c3 == "QRF" || c3 == "RQF" || c3 == "LQF" || c3 == "QLF") {
537
- if (sname) {
538
- return 2;
539
- } else {
540
- return 2;
541
- }
542
- } else if (c3 == "HRD") {
543
- if (sname) {
544
- return 2;
545
- } else {
546
- return 2;
547
- }
548
- } else if (c3 == "BRD") {
549
- if (sname) {
550
- return 2;
551
- } else {
552
- return 2;
553
- }
554
- } else if (c3 == "TRI") {
555
- if (sname) {
556
- return 2;
557
- } else {
558
- return 2;
559
- }
560
- }
561
- } else if (c2 == "SY") {
562
- if (c3 == "TRF") {
563
- if (sname) {
564
- return 8;
565
- } else {
566
- return 8;
567
- }
568
- } else if (sname && c3 == "TRD") {
569
- return 2;
570
- }
571
- } else if (cname && c2 == "HE") {
572
- if (c3 == "TRD") {
573
- return 2;
574
- }
575
- } else if (sname && c2 == "OR") {
576
- if (c3.at(0) == 'G') {
577
- if (c4 == "QR" || c4 == "RQ" || c4 == "LQ" || c4 == "QL" || c4 == "HR" || c4 == "TR" || c4 == "BR") {
578
- return 2;
579
- }
580
- } else if (c3.at(0) == 'M') {
581
- if (c4 == "QR" || c4 == "RQ" || c4 == "LQ" || c4 == "QL" || c4 == "HR" || c4 == "TR" || c4 == "BR") {
582
- return 2;
583
- }
584
- }
585
- } else if (cname && c2 == "UN") {
586
- if (c3.at(0) == 'G') {
587
- if (c4 == "QR" || c4 == "RQ" || c4 == "LQ" || c4 == "QL" || c4 == "HR" || c4 == "TR" || c4 == "BR") {
588
- return 2;
589
- }
590
- } else if (c3.at(0) == 'M') {
591
- if (c4 == "QR" || c4 == "RQ" || c4 == "LQ" || c4 == "QL" || c4 == "HR" || c4 == "TR" || c4 == "BR") {
592
- return 2;
593
- }
594
- }
595
- }
596
- return 2;
597
- }
598
-
599
-
600
- inline int ilaenv_crossover_point(const std::string& c2, const std::string& c3, const std::string& c4, bool sname, bool cname) {
601
- if (c2 == "GE") {
602
- if (c3 == "QRF" || c3 == "RQF" || c3 == "LQF" || c3 == "QLF") {
603
- if (sname) {
604
- return 128;
605
- } else {
606
- return 128;
607
- }
608
- } else if (c3 == "HRD") {
609
- if (sname) {
610
- return 128;
611
- } else {
612
- return 128;
613
- }
614
- } else if (c3 == "BRD") {
615
- if (sname) {
616
- return 128;
617
- } else {
618
- return 128;
619
- }
620
- }
621
- } else if (c2 == "SY") {
622
- if (sname && c3 == "TRD") {
623
- return 32;
624
- }
625
- } else if (cname && c2 == "HE") {
626
- if (c3 == "TRD") {
627
- return 32;
628
- }
629
- } else if (sname && c2 == "OR") {
630
- if (c3.at(0) == 'G') {
631
- if (c4 == "QR" || c4 == "RQ" || c4 == "LQ" || c4 == "QL" || c4 == "HR" || c4 == "TR" || c4 == "BR") {
632
- return 128;
633
- }
634
- }
635
- } else if (cname && c2 == "UN") {
636
- if (c3.at(0) == 'G') {
637
- if (c4 == "QR" || c4 == "RQ" || c4 == "LQ" || c4 == "QL" || c4 == "HR" || c4 == "TR" || c4 == "BR") {
638
- return 128;
639
- }
640
- }
641
- }
642
- return 0;
643
- }
644
-
645
-
646
- /* -- LAPACK auxiliary routine (version 3.2.1) -- */
647
-
648
- /* -- April 2009 -- */
649
-
650
- /* -- LAPACK is a software package provided by Univ. of Tennessee, -- */
651
- /* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
652
-
653
- /* .. Scalar Arguments .. */
654
- /* .. */
655
-
656
- /* Purpose */
657
- /* ======= */
658
-
659
- /* ILAENV is called from the LAPACK routines to choose problem-dependent */
660
- /* parameters for the local environment. See ISPEC for a description of */
661
- /* the parameters. */
662
-
663
- /* ILAENV returns an INTEGER */
664
- /* if ILAENV >= 0: ILAENV returns the value of the parameter specified by ISPEC */
665
- /* if ILAENV < 0: if ILAENV = -k, the k-th argument had an illegal value. */
666
-
667
- /* This version provides a set of parameters which should give good, */
668
- /* but not optimal, performance on many of the currently available */
669
- /* computers. Users are encouraged to modify this subroutine to set */
670
- /* the tuning parameters for their particular machine using the option */
671
- /* and problem size information in the arguments. */
672
-
673
- /* This routine will not function correctly if it is converted to all */
674
- /* lower case. Converting it to all upper case is allowed. */
675
-
676
- /* Arguments */
677
- /* ========= */
678
-
679
- /* ISPEC (input) INTEGER */
680
- /* Specifies the parameter to be returned as the value of */
681
- /* ILAENV. */
682
- /* = 1: the optimal blocksize; if this value is 1, an unblocked */
683
- /* algorithm will give the best performance. */
684
- /* = 2: the minimum block size for which the block routine */
685
- /* should be used; if the usable block size is less than */
686
- /* this value, an unblocked routine should be used. */
687
- /* = 3: the crossover point (in a block routine, for N less */
688
- /* than this value, an unblocked routine should be used) */
689
- /* = 4: the number of shifts, used in the nonsymmetric */
690
- /* eigenvalue routines (DEPRECATED) */
691
- /* = 5: the minimum column dimension for blocking to be used; */
692
- /* rectangular blocks must have dimension at least k by m, */
693
- /* where k is given by ILAENV(2,...) and m by ILAENV(5,...) */
694
- /* = 6: the crossover point for the SVD (when reducing an m by n */
695
- /* matrix to bidiagonal form, if max(m,n)/min(m,n) exceeds */
696
- /* this value, a QR factorization is used first to reduce */
697
- /* the matrix to a triangular form.) */
698
- /* = 7: the number of processors */
699
- /* = 8: the crossover point for the multishift QR method */
700
- /* for nonsymmetric eigenvalue problems (DEPRECATED) */
701
- /* = 9: maximum size of the subproblems at the bottom of the */
702
- /* computation tree in the divide-and-conquer algorithm */
703
- /* (used by xGELSD and xGESDD) */
704
- /* =10: ieee NaN arithmetic can be trusted not to trap */
705
- /* =11: infinity arithmetic can be trusted not to trap */
706
- /* 12 <= ISPEC <= 16: */
707
- /* xHSEQR or one of its subroutines, */
708
- /* see IPARMQ for detailed explanation */
709
-
710
- /* NAME (input) CHARACTER*(*) */
711
- /* The name of the calling subroutine, in either upper case or */
712
- /* lower case. */
713
-
714
- /* OPTS (input) CHARACTER*(*) */
715
- /* The character options to the subroutine NAME, concatenated */
716
- /* into a single character string. For example, UPLO = 'U', */
717
- /* TRANS = 'T', and DIAG = 'N' for a triangular routine would */
718
- /* be specified as OPTS = 'UTN'. */
719
-
720
- /* N1 (input) INTEGER */
721
- /* N2 (input) INTEGER */
722
- /* N3 (input) INTEGER */
723
- /* N4 (input) INTEGER */
724
- /* Problem dimensions for the subroutine NAME; these may not all */
725
- /* be required. */
726
-
727
- /* Further Details */
728
- /* =============== */
729
-
730
- /* The following conventions have been used when calling ILAENV from the */
731
- /* LAPACK routines: */
732
- /* 1) OPTS is a concatenation of all of the character options to */
733
- /* subroutine NAME, in the same order that they appear in the */
734
- /* argument list for NAME, even if they are not used in determining */
735
- /* the value of the parameter specified by ISPEC. */
736
- /* 2) The problem dimensions N1, N2, N3, N4 are specified in the order */
737
- /* that they appear in the argument list for NAME. N1 is used */
738
- /* first, N2 second, and so on, and unused problem dimensions are */
739
- /* passed a value of -1. */
740
- /* 3) The parameter value returned by ILAENV is checked for validity in */
741
- /* the calling subroutine. For example, ILAENV is used to retrieve */
742
- /* the optimal blocksize for STRTRI as follows: */
743
-
744
- /* NB = ILAENV( 1, 'STRTRI', UPLO // DIAG, N, -1, -1, -1 ) */
745
- /* IF( NB.LE.1 ) NB = MAX( 1, N ) */
746
-
747
- /* ===================================================================== */
748
- inline int ilaenv(int ispec, const std::string& name, int n1, int n2, int n3, int n4) {
749
-
750
- if (ispec < 1 || ispec > 3) {
751
- switch (ispec) {
752
- case 4: return 6; /* ISPEC = 4: number of shifts (used by xHSEQR) */
753
- case 5: return 2; /* ISPEC = 5: minimum column dimension (not used) */
754
- case 6: /* ISPEC = 6: crossover point for SVD (used by xGELSS and xGESVD) */
755
- return (int) ((float)std::min(n1, n2) * 1.6f);
756
- case 7: return 1; /* ISPEC = 7: number of processors (not used) */
757
- case 8: return 50; /* ISPEC = 8: crossover point for multishift (used by xHSEQR) */
758
- case 9: return 25; /* ISPEC = 9: maximum size of the subproblems at the bottom of the */
759
- /* computation tree in the divide-and-conquer algorithm */
760
- /* (used by xGELSD and xGESDD) */
761
- case 10: /* ISPEC = 10: ieee NaN arithmetic can be trusted not to trap */
762
- return ieeeck(1);
763
-
764
- case 11: /* ISPEC = 11: infinity arithmetic can be trusted not to trap */
765
- return ieeeck(0);
766
-
767
- default:
768
- if (ispec >= 12 && ispec <= 16) { /* 12 <= ISPEC <= 16: xHSEQR or one of its subroutines. */
769
- return iparmq(ispec, n2, n3);
770
- } else {
771
- return -1; /* Invalid value for ISPEC */
772
- }
773
- }
774
- }
775
-
776
-
777
- /* Convert NAME to upper case if the first character is lower case. */
778
-
779
- std::string subnam(name);
780
- std::transform(subnam.begin(), subnam.end(), subnam.begin(), ::toupper);
781
- std::string c1(subnam);
782
-
783
- bool sname = c1.at(0) == 'S' || c1.at(0) == 'D',
784
- cname = c1.at(0) == 'C' || c1.at(0) == 'Z';
785
-
786
- if (! (cname || sname)) return 1;
787
-
788
- std::string c2(subnam.substr(1, 2)),
789
- c3(subnam.substr(3, 3)),
790
- c4(c3.substr(1, 2));
791
-
792
- if (ispec == 2) return ilaenv_min_block_size(c2, c3, c4, sname, cname);
793
- if (ispec == 3) return ilaenv_crossover_point(c2, c3, c4, sname, cname);
794
- return ilaenv_block_size(n2, n4, c2, c3, c4, sname, cname);
795
-
796
- } /* ilaenv_ */
797
-
798
-
799
-
800
- template <typename DType>
801
- inline int lsame(char const *ca, char const *cb, int ca_len, int cb_len)
802
- {
803
- int ret_val;
804
-
805
- int static inta, intb, zcode;
806
- /* -- LAPACK auxiliary routine (version 3.2) -- */
807
- /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
808
- /* November 2006 */
809
-
810
- /* .. Scalar Arguments .. */
811
- /* .. */
812
-
813
- /* Purpose */
814
- /* ======= */
815
-
816
- /* LSAME returns .TRUE. if CA is the same letter as CB regardless of */
817
- /* case. */
818
-
819
- /* Arguments */
820
- /* ========= */
821
-
822
- /* CA (input) CHARACTER*1 */
823
- /* CB (input) CHARACTER*1 */
824
- /* CA and CB specify the single characters to be compared. */
825
-
826
- /* ===================================================================== */
827
- inta = *(unsigned char *)ca;
828
- intb = *(unsigned char *)cb;
829
- ret_val = inta == intb;
830
- if (ret_val) {
831
- return ret_val;
832
- }
833
-
834
- zcode = (int)"Z";
835
-
836
- if (zcode == 90 || zcode == 122 ) {
837
- /* ASCII is assumed - ZCODE is the ASCII code of either lower or */
838
- /* upper case 'Z'. */
839
- if (inta >=97 && inta <= 122) {
840
- inta += -32;
841
- }
842
- if (intb >= 97 && intb <= 122) {
843
- intb += -32;
844
- }
845
- } else if (zcode == 233 || zcode == 169) {
846
- /* EBCDIC is assumed - ZCODE is the EBCDIC code of either lower or */
847
- /* upper case 'Z'. */
848
- if (inta >= 129 && inta <= 137 || inta >= 145 && inta <= 153 || inta >= 162 && inta <= 169) {
849
- inta += 64;
850
- }
851
- if (intb >= 129 && intb <= 137 || intb >= 145 && intb <= 153 || intb >= 162 && intb <= 169) {
852
- intb += 64;
853
- }
854
- } else if (zcode = 218 || zcode == 250) {
855
- /* ASCII is assumed, on Prime machines - ZCODE is the ASCII code */
856
- /* plus 128 of either lower or upper case 'Z'. */
857
- if (inta >= 225 && inta <= 250) {
858
- inta += -32;
859
- }
860
- if (intb >= 225 && intb <= 250) {
861
- intb += -32;
862
- }
863
- }
864
- ret_val = inta == intb;
865
-
866
- return ret_val;
867
- }
868
-
869
-
870
-
871
-
872
- } // end namespace lapack
873
-
874
-
875
-
876
-
877
- }}
878
-
879
- #endif