nmatrix-lapacke 0.2.1 → 0.2.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/ext/nmatrix/data/data.h +7 -8
- data/ext/nmatrix/data/ruby_object.h +1 -4
- data/ext/nmatrix/math/asum.h +10 -31
- data/ext/nmatrix/math/cblas_templates_core.h +10 -10
- data/ext/nmatrix/math/getrf.h +2 -2
- data/ext/nmatrix/math/imax.h +12 -9
- data/ext/nmatrix/math/laswp.h +3 -3
- data/ext/nmatrix/math/long_dtype.h +16 -3
- data/ext/nmatrix/math/magnitude.h +54 -0
- data/ext/nmatrix/math/nrm2.h +19 -14
- data/ext/nmatrix/math/trsm.h +40 -36
- data/ext/nmatrix/math/util.h +14 -0
- data/ext/nmatrix/nmatrix.h +39 -1
- data/ext/nmatrix/storage/common.h +9 -3
- data/ext/nmatrix/storage/yale/class.h +1 -1
- data/ext/nmatrix_lapacke/extconf.rb +3 -136
- data/ext/nmatrix_lapacke/lapacke.cpp +104 -84
- data/ext/nmatrix_lapacke/lapacke/src/lapacke_cgeqrf.c +77 -0
- data/ext/nmatrix_lapacke/lapacke/src/lapacke_cgeqrf_work.c +89 -0
- data/ext/nmatrix_lapacke/lapacke/src/lapacke_cunmqr.c +88 -0
- data/ext/nmatrix_lapacke/lapacke/src/lapacke_cunmqr_work.c +111 -0
- data/ext/nmatrix_lapacke/lapacke/src/lapacke_dgeqrf.c +75 -0
- data/ext/nmatrix_lapacke/lapacke/src/lapacke_dgeqrf_work.c +87 -0
- data/ext/nmatrix_lapacke/lapacke/src/lapacke_dormqr.c +86 -0
- data/ext/nmatrix_lapacke/lapacke/src/lapacke_dormqr_work.c +109 -0
- data/ext/nmatrix_lapacke/lapacke/src/lapacke_sgeqrf.c +75 -0
- data/ext/nmatrix_lapacke/lapacke/src/lapacke_sgeqrf_work.c +87 -0
- data/ext/nmatrix_lapacke/lapacke/src/lapacke_sormqr.c +86 -0
- data/ext/nmatrix_lapacke/lapacke/src/lapacke_sormqr_work.c +109 -0
- data/ext/nmatrix_lapacke/lapacke/src/lapacke_zgeqrf.c +77 -0
- data/ext/nmatrix_lapacke/lapacke/src/lapacke_zgeqrf_work.c +89 -0
- data/ext/nmatrix_lapacke/lapacke/src/lapacke_zunmqr.c +88 -0
- data/ext/nmatrix_lapacke/lapacke/src/lapacke_zunmqr_work.c +111 -0
- data/ext/nmatrix_lapacke/lapacke/utils/lapacke_c_nancheck.c +51 -0
- data/ext/nmatrix_lapacke/lapacke/utils/lapacke_d_nancheck.c +51 -0
- data/ext/nmatrix_lapacke/lapacke/utils/lapacke_s_nancheck.c +51 -0
- data/ext/nmatrix_lapacke/lapacke/utils/lapacke_z_nancheck.c +51 -0
- data/ext/nmatrix_lapacke/math_lapacke.cpp +149 -17
- data/ext/nmatrix_lapacke/math_lapacke/lapacke_templates.h +76 -0
- data/lib/nmatrix/lapacke.rb +118 -0
- data/spec/00_nmatrix_spec.rb +50 -1
- data/spec/02_slice_spec.rb +21 -21
- data/spec/blas_spec.rb +25 -3
- data/spec/math_spec.rb +233 -5
- data/spec/plugins/lapacke/lapacke_spec.rb +187 -0
- data/spec/shortcuts_spec.rb +145 -5
- data/spec/spec_helper.rb +24 -1
- metadata +38 -8
data/ext/nmatrix/math/trsm.h
CHANGED
@@ -81,10 +81,14 @@ inline void trsm_nothrow(const enum CBLAS_SIDE side, const enum CBLAS_UPLO uplo,
|
|
81
81
|
// (row-major) trsm: left upper trans nonunit m=3 n=1 1/1 a 3 b 3
|
82
82
|
|
83
83
|
if (m == 0 || n == 0) return; /* Quick return if possible. */
|
84
|
+
|
85
|
+
// Apply necessary offset
|
86
|
+
a -= 1 + lda;
|
87
|
+
b -= 1 + ldb;
|
84
88
|
|
85
89
|
if (alpha == 0) { // Handle alpha == 0
|
86
|
-
for (int j =
|
87
|
-
for (int i =
|
90
|
+
for (int j = 1; j <= n; ++j) {
|
91
|
+
for (int i = 1; i <= m; ++i) {
|
88
92
|
b[i + j * ldb] = 0;
|
89
93
|
}
|
90
94
|
}
|
@@ -96,37 +100,37 @@ inline void trsm_nothrow(const enum CBLAS_SIDE side, const enum CBLAS_UPLO uplo,
|
|
96
100
|
|
97
101
|
/* Form B := alpha*inv( A )*B. */
|
98
102
|
if (uplo == CblasUpper) {
|
99
|
-
for (int j =
|
103
|
+
for (int j = 1; j <= n; ++j) {
|
100
104
|
if (alpha != 1) {
|
101
|
-
for (int i =
|
105
|
+
for (int i = 1; i <= m; ++i) {
|
102
106
|
b[i + j * ldb] = alpha * b[i + j * ldb];
|
103
107
|
}
|
104
108
|
}
|
105
|
-
for (int k = m
|
109
|
+
for (int k = m; k >= 1; --k) {
|
106
110
|
if (b[k + j * ldb] != 0) {
|
107
111
|
if (diag == CblasNonUnit) {
|
108
112
|
b[k + j * ldb] /= a[k + k * lda];
|
109
113
|
}
|
110
114
|
|
111
|
-
for (int i =
|
115
|
+
for (int i = 1; i <= k-1; ++i) {
|
112
116
|
b[i + j * ldb] -= b[k + j * ldb] * a[i + k * lda];
|
113
117
|
}
|
114
118
|
}
|
115
119
|
}
|
116
120
|
}
|
117
121
|
} else {
|
118
|
-
for (int j =
|
122
|
+
for (int j = 1; j <= n; ++j) {
|
119
123
|
if (alpha != 1) {
|
120
|
-
for (int i =
|
124
|
+
for (int i = 1; i <= m; ++i) {
|
121
125
|
b[i + j * ldb] = alpha * b[i + j * ldb];
|
122
126
|
}
|
123
127
|
}
|
124
|
-
for (int k =
|
128
|
+
for (int k = 1; k <= m; ++k) {
|
125
129
|
if (b[k + j * ldb] != 0.) {
|
126
130
|
if (diag == CblasNonUnit) {
|
127
131
|
b[k + j * ldb] /= a[k + k * lda];
|
128
132
|
}
|
129
|
-
for (int i = k+1; i
|
133
|
+
for (int i = k+1; i <= m; ++i) {
|
130
134
|
b[i + j * ldb] -= b[k + j * ldb] * a[i + k * lda];
|
131
135
|
}
|
132
136
|
}
|
@@ -137,10 +141,10 @@ inline void trsm_nothrow(const enum CBLAS_SIDE side, const enum CBLAS_UPLO uplo,
|
|
137
141
|
|
138
142
|
/* Form B := alpha*inv( A**T )*B. */
|
139
143
|
if (uplo == CblasUpper) {
|
140
|
-
for (int j =
|
141
|
-
for (int i =
|
144
|
+
for (int j = 1; j <= n; ++j) {
|
145
|
+
for (int i = 1; i <= m; ++i) {
|
142
146
|
DType temp = alpha * b[i + j * ldb];
|
143
|
-
for (int k =
|
147
|
+
for (int k = 1; k <= i-1; ++k) { // limit was i-1. Lots of similar bugs in this code, probably.
|
144
148
|
temp -= a[k + i * lda] * b[k + j * ldb];
|
145
149
|
}
|
146
150
|
if (diag == CblasNonUnit) {
|
@@ -150,10 +154,10 @@ inline void trsm_nothrow(const enum CBLAS_SIDE side, const enum CBLAS_UPLO uplo,
|
|
150
154
|
}
|
151
155
|
}
|
152
156
|
} else {
|
153
|
-
for (int j =
|
154
|
-
for (int i = m
|
157
|
+
for (int j = 1; j <= n; ++j) {
|
158
|
+
for (int i = m; i >= 1; --i) {
|
155
159
|
DType temp= alpha * b[i + j * ldb];
|
156
|
-
for (int k = i+1; k
|
160
|
+
for (int k = i+1; k <= m; ++k) {
|
157
161
|
temp -= a[k + i * lda] * b[k + j * ldb];
|
158
162
|
}
|
159
163
|
if (diag == CblasNonUnit) {
|
@@ -171,37 +175,37 @@ inline void trsm_nothrow(const enum CBLAS_SIDE side, const enum CBLAS_UPLO uplo,
|
|
171
175
|
/* Form B := alpha*B*inv( A ). */
|
172
176
|
|
173
177
|
if (uplo == CblasUpper) {
|
174
|
-
for (int j =
|
178
|
+
for (int j = 1; j <= n; ++j) {
|
175
179
|
if (alpha != 1) {
|
176
|
-
for (int i =
|
180
|
+
for (int i = 1; i <= m; ++i) {
|
177
181
|
b[i + j * ldb] = alpha * b[i + j * ldb];
|
178
182
|
}
|
179
183
|
}
|
180
|
-
for (int k =
|
184
|
+
for (int k = 1; k <= j-1; ++k) {
|
181
185
|
if (a[k + j * lda] != 0) {
|
182
|
-
for (int i =
|
186
|
+
for (int i = 1; i <= m; ++i) {
|
183
187
|
b[i + j * ldb] -= a[k + j * lda] * b[i + k * ldb];
|
184
188
|
}
|
185
189
|
}
|
186
190
|
}
|
187
191
|
if (diag == CblasNonUnit) {
|
188
192
|
DType temp = 1 / a[j + j * lda];
|
189
|
-
for (int i =
|
193
|
+
for (int i = 1; i <= m; ++i) {
|
190
194
|
b[i + j * ldb] = temp * b[i + j * ldb];
|
191
195
|
}
|
192
196
|
}
|
193
197
|
}
|
194
198
|
} else {
|
195
|
-
for (int j = n
|
199
|
+
for (int j = n; j >= 1; --j) {
|
196
200
|
if (alpha != 1) {
|
197
|
-
for (int i =
|
201
|
+
for (int i = 1; i <= m; ++i) {
|
198
202
|
b[i + j * ldb] = alpha * b[i + j * ldb];
|
199
203
|
}
|
200
204
|
}
|
201
205
|
|
202
|
-
for (int k = j+1; k
|
206
|
+
for (int k = j+1; k <= n; ++k) {
|
203
207
|
if (a[k + j * lda] != 0.) {
|
204
|
-
for (int i =
|
208
|
+
for (int i = 1; i <= m; ++i) {
|
205
209
|
b[i + j * ldb] -= a[k + j * lda] * b[i + k * ldb];
|
206
210
|
}
|
207
211
|
}
|
@@ -209,7 +213,7 @@ inline void trsm_nothrow(const enum CBLAS_SIDE side, const enum CBLAS_UPLO uplo,
|
|
209
213
|
if (diag == CblasNonUnit) {
|
210
214
|
DType temp = 1 / a[j + j * lda];
|
211
215
|
|
212
|
-
for (int i =
|
216
|
+
for (int i = 1; i <= m; ++i) {
|
213
217
|
b[i + j * ldb] = temp * b[i + j * ldb];
|
214
218
|
}
|
215
219
|
}
|
@@ -220,45 +224,45 @@ inline void trsm_nothrow(const enum CBLAS_SIDE side, const enum CBLAS_UPLO uplo,
|
|
220
224
|
/* Form B := alpha*B*inv( A**T ). */
|
221
225
|
|
222
226
|
if (uplo == CblasUpper) {
|
223
|
-
for (int k = n
|
227
|
+
for (int k = n; k >= 1; --k) {
|
224
228
|
if (diag == CblasNonUnit) {
|
225
229
|
DType temp= 1 / a[k + k * lda];
|
226
|
-
for (int i =
|
230
|
+
for (int i = 1; i <= m; ++i) {
|
227
231
|
b[i + k * ldb] = temp * b[i + k * ldb];
|
228
232
|
}
|
229
233
|
}
|
230
|
-
for (int j =
|
234
|
+
for (int j = 1; j <= k-1; ++j) {
|
231
235
|
if (a[j + k * lda] != 0.) {
|
232
236
|
DType temp= a[j + k * lda];
|
233
|
-
for (int i =
|
237
|
+
for (int i = 1; i <= m; ++i) {
|
234
238
|
b[i + j * ldb] -= temp * b[i + k * ldb];
|
235
239
|
}
|
236
240
|
}
|
237
241
|
}
|
238
242
|
if (alpha != 1) {
|
239
|
-
for (int i =
|
243
|
+
for (int i = 1; i <= m; ++i) {
|
240
244
|
b[i + k * ldb] = alpha * b[i + k * ldb];
|
241
245
|
}
|
242
246
|
}
|
243
247
|
}
|
244
248
|
} else {
|
245
|
-
for (int k =
|
249
|
+
for (int k = 1; k <= n; ++k) {
|
246
250
|
if (diag == CblasNonUnit) {
|
247
251
|
DType temp = 1 / a[k + k * lda];
|
248
|
-
for (int i =
|
252
|
+
for (int i = 1; i <= m; ++i) {
|
249
253
|
b[i + k * ldb] = temp * b[i + k * ldb];
|
250
254
|
}
|
251
255
|
}
|
252
|
-
for (int j = k+1; j
|
256
|
+
for (int j = k+1; j <= n; ++j) {
|
253
257
|
if (a[j + k * lda] != 0.) {
|
254
258
|
DType temp = a[j + k * lda];
|
255
|
-
for (int i =
|
259
|
+
for (int i = 1; i <= m; ++i) {
|
256
260
|
b[i + j * ldb] -= temp * b[i + k * ldb];
|
257
261
|
}
|
258
262
|
}
|
259
263
|
}
|
260
264
|
if (alpha != 1) {
|
261
|
-
for (int i =
|
265
|
+
for (int i = 1; i <= m; ++i) {
|
262
266
|
b[i + k * ldb] = alpha * b[i + k * ldb];
|
263
267
|
}
|
264
268
|
}
|
data/ext/nmatrix/math/util.h
CHANGED
@@ -70,6 +70,20 @@ static inline enum CBLAS_SIDE blas_side_sym(VALUE op) {
|
|
70
70
|
return CblasLeft;
|
71
71
|
}
|
72
72
|
|
73
|
+
/*
|
74
|
+
* Interprets the LAPACK side argument which could be :left or :right
|
75
|
+
*
|
76
|
+
* Related to obtaining Q in QR factorization after calling lapack_geqrf
|
77
|
+
*/
|
78
|
+
|
79
|
+
static inline char lapacke_side_sym(VALUE op) {
|
80
|
+
ID op_id = rb_to_id(op);
|
81
|
+
if (op_id == nm_rb_left) return 'L';
|
82
|
+
if (op_id == nm_rb_right) return 'R';
|
83
|
+
else rb_raise(rb_eArgError, "Expected :left or :right for side argument");
|
84
|
+
return 'L';
|
85
|
+
}
|
86
|
+
|
73
87
|
/*
|
74
88
|
* Interprets cblas argument which could be :upper or :lower
|
75
89
|
*
|
data/ext/nmatrix/nmatrix.h
CHANGED
@@ -33,6 +33,7 @@
|
|
33
33
|
*/
|
34
34
|
|
35
35
|
#include <ruby.h>
|
36
|
+
#include "ruby_constants.h"
|
36
37
|
|
37
38
|
#ifdef __cplusplus
|
38
39
|
#include <cmath>
|
@@ -57,6 +58,28 @@
|
|
57
58
|
#include "nm_memory.h"
|
58
59
|
#endif
|
59
60
|
|
61
|
+
#ifndef RB_BUILTIN_TYPE
|
62
|
+
# define RB_BUILTIN_TYPE(obj) BUILTIN_TYPE(obj)
|
63
|
+
#endif
|
64
|
+
|
65
|
+
#ifndef RB_FLOAT_TYPE_P
|
66
|
+
/* NOTE: assume flonum doesn't exist */
|
67
|
+
# define RB_FLOAT_TYPE_P(obj) ( \
|
68
|
+
(!SPECIAL_CONST_P(obj) && BUILTIN_TYPE(obj) == T_FLOAT))
|
69
|
+
#endif
|
70
|
+
|
71
|
+
#ifndef RB_TYPE_P
|
72
|
+
# define RB_TYPE_P(obj, type) ( \
|
73
|
+
((type) == T_FIXNUM) ? FIXNUM_P(obj) : \
|
74
|
+
((type) == T_TRUE) ? ((obj) == Qtrue) : \
|
75
|
+
((type) == T_FALSE) ? ((obj) == Qfalse) : \
|
76
|
+
((type) == T_NIL) ? ((obj) == Qnil) : \
|
77
|
+
((type) == T_UNDEF) ? ((obj) == Qundef) : \
|
78
|
+
((type) == T_SYMBOL) ? SYMBOL_P(obj) : \
|
79
|
+
((type) == T_FLOAT) ? RB_FLOAT_TYPE_P(obj) : \
|
80
|
+
(!SPECIAL_CONST_P(obj) && BUILTIN_TYPE(obj) == (type)))
|
81
|
+
#endif
|
82
|
+
|
60
83
|
#ifndef FIX_CONST_VALUE_PTR
|
61
84
|
# if defined(__fcc__) || defined(__fcc_version) || \
|
62
85
|
defined(__FCC__) || defined(__FCC_VERSION)
|
@@ -343,11 +366,25 @@ NM_DEF_STRUCT_POST(NM_GC_HOLDER); // };
|
|
343
366
|
|
344
367
|
#define NM_SRC(val) (NM_STORAGE(val)->src)
|
345
368
|
#define NM_DIM(val) (NM_STORAGE(val)->dim)
|
369
|
+
|
370
|
+
// Returns an int corresponding the data type of the nmatrix. See the dtype_t
|
371
|
+
// enum for a list of possible data types.
|
346
372
|
#define NM_DTYPE(val) (NM_STORAGE(val)->dtype)
|
373
|
+
|
374
|
+
// Returns a number corresponding the storage type of the nmatrix. See the stype_t
|
375
|
+
// enum for a list of possible storage types.
|
347
376
|
#define NM_STYPE(val) (NM_STRUCT(val)->stype)
|
377
|
+
|
378
|
+
// Get the shape of the ith dimension (int)
|
348
379
|
#define NM_SHAPE(val,i) (NM_STORAGE(val)->shape[(i)])
|
380
|
+
|
381
|
+
// Get the shape of the 0th dimension (int)
|
349
382
|
#define NM_SHAPE0(val) (NM_STORAGE(val)->shape[0])
|
383
|
+
|
384
|
+
// Get the shape of the 1st dimenension (int)
|
350
385
|
#define NM_SHAPE1(val) (NM_STORAGE(val)->shape[1])
|
386
|
+
|
387
|
+
// Get the default value assigned to the nmatrix.
|
351
388
|
#define NM_DEFAULT_VAL(val) (NM_STORAGE_LIST(val)->default_val)
|
352
389
|
|
353
390
|
// Number of elements in a dense nmatrix.
|
@@ -366,7 +403,8 @@ NM_DEF_STRUCT_POST(NM_GC_HOLDER); // };
|
|
366
403
|
|
367
404
|
#define RB_FILE_EXISTS(fn) (rb_funcall(rb_const_get(rb_cObject, rb_intern("File")), rb_intern("exists?"), 1, (fn)) == Qtrue)
|
368
405
|
|
369
|
-
#define
|
406
|
+
#define IsNMatrixType(v) (RB_TYPE_P(v, T_DATA) && (RDATA(v)->dfree == (RUBY_DATA_FUNC)nm_delete || RDATA(v)->dfree == (RUBY_DATA_FUNC)nm_delete_ref))
|
407
|
+
#define CheckNMatrixType(v) if (!IsNMatrixType(v)) rb_raise(rb_eTypeError, "expected NMatrix on left-hand side of operation");
|
370
408
|
|
371
409
|
#define NM_IsNMatrix(obj) \
|
372
410
|
(rb_obj_is_kind_of(obj, cNMatrix) == Qtrue)
|
@@ -34,6 +34,7 @@
|
|
34
34
|
|
35
35
|
#include <ruby.h>
|
36
36
|
#include <cmath> // pow().
|
37
|
+
#include <type_traits>
|
37
38
|
|
38
39
|
/*
|
39
40
|
* Project Includes
|
@@ -45,6 +46,11 @@
|
|
45
46
|
* Macros
|
46
47
|
*/
|
47
48
|
|
49
|
+
#define u_int8_t static_assert(false, "Please use uint8_t for cross-platform support and consistency."); uint8_t
|
50
|
+
#define u_int16_t static_assert(false, "Please use uint16_t for cross-platform support and consistency."); uint16_t
|
51
|
+
#define u_int32_t static_assert(false, "Please use uint32_t for cross-platform support and consistency."); uint32_t
|
52
|
+
#define u_int64_t static_assert(false, "Please use uint64_t for cross-platform support and consistency."); uint64_t
|
53
|
+
|
48
54
|
extern "C" {
|
49
55
|
|
50
56
|
/*
|
@@ -152,7 +158,7 @@ namespace nm {
|
|
152
158
|
EWOP_INT_INT_DIV(int16_t, int32_t)
|
153
159
|
EWOP_INT_INT_DIV(int16_t, int64_t)
|
154
160
|
EWOP_INT_INT_DIV(int8_t, int8_t)
|
155
|
-
EWOP_INT_UINT_DIV(int8_t,
|
161
|
+
EWOP_INT_UINT_DIV(int8_t, uint8_t)
|
156
162
|
EWOP_INT_INT_DIV(int8_t, int16_t)
|
157
163
|
EWOP_INT_INT_DIV(int8_t, int32_t)
|
158
164
|
EWOP_INT_INT_DIV(int8_t, int64_t)
|
@@ -162,12 +168,12 @@ namespace nm {
|
|
162
168
|
EWOP_UINT_INT_DIV(uint8_t, int32_t)
|
163
169
|
EWOP_UINT_INT_DIV(uint8_t, int64_t)
|
164
170
|
EWOP_FLOAT_INT_DIV(float, int8_t)
|
165
|
-
EWOP_FLOAT_INT_DIV(float,
|
171
|
+
EWOP_FLOAT_INT_DIV(float, uint8_t)
|
166
172
|
EWOP_FLOAT_INT_DIV(float, int16_t)
|
167
173
|
EWOP_FLOAT_INT_DIV(float, int32_t)
|
168
174
|
EWOP_FLOAT_INT_DIV(float, int64_t)
|
169
175
|
EWOP_FLOAT_INT_DIV(double, int8_t)
|
170
|
-
EWOP_FLOAT_INT_DIV(double,
|
176
|
+
EWOP_FLOAT_INT_DIV(double, uint8_t)
|
171
177
|
EWOP_FLOAT_INT_DIV(double, int16_t)
|
172
178
|
EWOP_FLOAT_INT_DIV(double, int32_t)
|
173
179
|
EWOP_FLOAT_INT_DIV(double, int64_t)
|
@@ -376,7 +376,7 @@ public:
|
|
376
376
|
v = reinterpret_cast<D*>(s->elements);
|
377
377
|
v_size = nm_storage_count_max_elements(s);
|
378
378
|
|
379
|
-
} else if (
|
379
|
+
} else if (RB_TYPE_P(right, T_ARRAY)) {
|
380
380
|
v_size = RARRAY_LEN(right);
|
381
381
|
v = NM_ALLOC_N(D, v_size);
|
382
382
|
if (dtype() == nm::RUBYOBJ) {
|
@@ -25,74 +25,11 @@
|
|
25
25
|
# This file checks for ATLAS and other necessary headers, and
|
26
26
|
# generates a Makefile for compiling NMatrix.
|
27
27
|
|
28
|
-
require "mkmf"
|
29
|
-
|
30
|
-
|
31
|
-
# Function derived from NArray's extconf.rb.
|
32
|
-
def have_type(type, header=nil) #:nodoc:
|
33
|
-
printf "checking for %s... ", type
|
34
|
-
STDOUT.flush
|
35
|
-
|
36
|
-
src = <<"SRC"
|
37
|
-
#include <ruby.h>
|
38
|
-
SRC
|
39
|
-
|
40
|
-
|
41
|
-
src << <<"SRC" unless header.nil?
|
42
|
-
#include <#{header}>
|
43
|
-
SRC
|
44
|
-
|
45
|
-
r = try_link(src + <<"SRC")
|
46
|
-
int main() { return 0; }
|
47
|
-
int t() { #{type} a; return 0; }
|
48
|
-
SRC
|
49
|
-
|
50
|
-
unless r
|
51
|
-
print "no\n"
|
52
|
-
return false
|
53
|
-
end
|
54
|
-
|
55
|
-
$defs.push(format("-DHAVE_%s", type.upcase))
|
56
|
-
|
57
|
-
print "yes\n"
|
58
|
-
|
59
|
-
return true
|
60
|
-
end
|
61
|
-
|
62
|
-
# Function derived from NArray's extconf.rb.
|
63
|
-
def create_conf_h(file) #:nodoc:
|
64
|
-
print "creating #{file}\n"
|
65
|
-
File.open(file, 'w') do |hfile|
|
66
|
-
header_guard = file.upcase.sub(/\s|\./, '_')
|
67
|
-
|
68
|
-
hfile.puts "#ifndef #{header_guard}"
|
69
|
-
hfile.puts "#define #{header_guard}"
|
70
|
-
hfile.puts
|
71
|
-
|
72
|
-
# FIXME: Find a better way to do this:
|
73
|
-
hfile.puts "#define RUBY_2 1" if RUBY_VERSION >= '2.0'
|
74
|
-
|
75
|
-
for line in $defs
|
76
|
-
line =~ /^-D(.*)/
|
77
|
-
hfile.printf "#define %s 1\n", $1
|
78
|
-
end
|
79
|
-
|
80
|
-
hfile.puts
|
81
|
-
hfile.puts "#endif"
|
82
|
-
end
|
83
|
-
end
|
84
|
-
|
85
|
-
if RUBY_VERSION < '1.9'
|
86
|
-
raise(NotImplementedError, "Sorry, you need at least Ruby 1.9!")
|
87
|
-
else
|
88
|
-
#$INSTALLFILES = [['nmatrix.h', '$(archdir)'], ['nmatrix.hpp', '$(archdir)'], ['nmatrix_config.h', '$(archdir)'], ['nm_memory.h', '$(archdir)']]
|
89
|
-
if /cygwin|mingw/ =~ RUBY_PLATFORM
|
90
|
-
#$INSTALLFILES << ['libnmatrix.a', '$(archdir)']
|
91
|
-
end
|
92
|
-
end
|
28
|
+
require "nmatrix/mkmf"
|
93
29
|
|
30
|
+
#$INSTALLFILES = [['nmatrix.h', '$(archdir)'], ['nmatrix.hpp', '$(archdir)'], ['nmatrix_config.h', '$(archdir)'], ['nm_memory.h', '$(archdir)']]
|
94
31
|
if /cygwin|mingw/ =~ RUBY_PLATFORM
|
95
|
-
|
32
|
+
#$INSTALLFILES << ['libnmatrix.a', '$(archdir)']
|
96
33
|
end
|
97
34
|
|
98
35
|
$DEBUG = true
|
@@ -107,56 +44,6 @@ basenames = %w{nmatrix_lapacke math_lapacke lapacke}
|
|
107
44
|
$objs = basenames.map { |b| "#{b}.o" }
|
108
45
|
$srcs = basenames.map { |b| "#{b}.cpp" }
|
109
46
|
|
110
|
-
def find_newer_gplusplus #:nodoc:
|
111
|
-
print "checking for apparent GNU g++ binary with C++0x/C++11 support... "
|
112
|
-
[9,8,7,6,5,4,3].each do |minor|
|
113
|
-
ver = "4.#{minor}"
|
114
|
-
gpp = "g++-#{ver}"
|
115
|
-
result = `which #{gpp}`
|
116
|
-
next if result.empty?
|
117
|
-
CONFIG['CXX'] = gpp
|
118
|
-
puts ver
|
119
|
-
return CONFIG['CXX']
|
120
|
-
end
|
121
|
-
false
|
122
|
-
end
|
123
|
-
|
124
|
-
def gplusplus_version
|
125
|
-
cxxvar = proc { |n| `#{CONFIG['CXX']} -E -dM - </dev/null | grep #{n}`.chomp.split(' ')[2] }
|
126
|
-
major = cxxvar.call('__GNUC__')
|
127
|
-
minor = cxxvar.call('__GNUC_MINOR__')
|
128
|
-
patch = cxxvar.call('__GNUC_PATCHLEVEL__')
|
129
|
-
|
130
|
-
raise("unable to determine g++ version (match to get version was nil)") if major.nil? || minor.nil? || patch.nil?
|
131
|
-
|
132
|
-
"#{major}.#{minor}.#{patch}"
|
133
|
-
end
|
134
|
-
|
135
|
-
|
136
|
-
if CONFIG['CXX'] == 'clang++'
|
137
|
-
$CXX_STANDARD = 'c++11'
|
138
|
-
|
139
|
-
else
|
140
|
-
version = gplusplus_version
|
141
|
-
if version < '4.3.0' && CONFIG['CXX'] == 'g++' # see if we can find a newer G++, unless it's been overridden by user
|
142
|
-
if !find_newer_gplusplus
|
143
|
-
raise("You need a version of g++ which supports -std=c++0x or -std=c++11. If you're on a Mac and using Homebrew, we recommend using mac-brew-gcc.sh to install a more recent g++.")
|
144
|
-
end
|
145
|
-
version = gplusplus_version
|
146
|
-
end
|
147
|
-
|
148
|
-
if version < '4.7.0'
|
149
|
-
$CXX_STANDARD = 'c++0x'
|
150
|
-
else
|
151
|
-
$CXX_STANDARD = 'c++11'
|
152
|
-
end
|
153
|
-
puts "using C++ standard... #{$CXX_STANDARD}"
|
154
|
-
puts "g++ reports version... " + `#{CONFIG['CXX']} --version|head -n 1|cut -f 3 -d " "`
|
155
|
-
end
|
156
|
-
|
157
|
-
# add smmp in to get generic transp; remove smmp2 to eliminate funcptr transp
|
158
|
-
|
159
|
-
|
160
47
|
# For some reason, if we try to look for /usr/lib64/atlas on a Mac OS X Mavericks system, and the directory does not
|
161
48
|
# exist, it will give a linker error -- even if the lib dir is already correctly included with -L. So we need to check
|
162
49
|
# that Dir.exists?(d) for each.
|
@@ -170,32 +57,12 @@ unless have_library("lapack")
|
|
170
57
|
dir_config("lapack", idefaults[:lapack], ldefaults[:lapack])
|
171
58
|
end
|
172
59
|
|
173
|
-
|
174
|
-
|
175
|
-
#have_func("rb_scan_args", "ruby.h")
|
176
|
-
|
177
60
|
# Order matters here: ATLAS has to go after LAPACK: http://mail.scipy.org/pipermail/scipy-user/2007-January/010717.html
|
178
61
|
$libs += " -llapack "
|
179
62
|
#To use the Intel MKL, comment out the line above, and also comment out the bit above with have_library and dir_config for lapack.
|
180
63
|
#Then add something like the line below (for exactly what linker flags to use see https://software.intel.com/en-us/articles/intel-mkl-link-line-advisor ):
|
181
64
|
#$libs += " -L${MKLROOT}/lib/intel64 -lmkl_intel_lp64 -lmkl_core -lmkl_sequential "
|
182
65
|
|
183
|
-
|
184
|
-
# For release, these next two should both be changed to -O3.
|
185
|
-
$CFLAGS += " -O3" #" -O0 -g "
|
186
|
-
#$CFLAGS += " -static -O0 -g "
|
187
|
-
$CXXFLAGS += " -O3 -std=#{$CXX_STANDARD}" #" -O0 -g -std=#{$CXX_STANDARD} " #-fmax-errors=10 -save-temps
|
188
|
-
#$CPPFLAGS += " -static -O0 -g -std=#{$CXX_STANDARD} "
|
189
|
-
|
190
|
-
CONFIG['warnflags'].gsub!('-Wshorten-64-to-32', '') # doesn't work except in Mac-patched gcc (4.2)
|
191
|
-
CONFIG['warnflags'].gsub!('-Wdeclaration-after-statement', '')
|
192
|
-
CONFIG['warnflags'].gsub!('-Wimplicit-function-declaration', '')
|
193
|
-
|
194
|
-
have_func("rb_array_const_ptr", "ruby.h")
|
195
|
-
have_macro("FIX_CONST_VALUE_PTR", "ruby.h")
|
196
|
-
have_macro("RARRAY_CONST_PTR", "ruby.h")
|
197
|
-
have_macro("RARRAY_AREF", "ruby.h")
|
198
|
-
|
199
66
|
create_conf_h("nmatrix_lapacke_config.h")
|
200
67
|
create_makefile("nmatrix_lapacke")
|
201
68
|
|