pnmatrix 1.2.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/ext/nmatrix/binary_format.txt +53 -0
- data/ext/nmatrix/data/complex.h +388 -0
- data/ext/nmatrix/data/data.cpp +274 -0
- data/ext/nmatrix/data/data.h +651 -0
- data/ext/nmatrix/data/meta.h +64 -0
- data/ext/nmatrix/data/ruby_object.h +386 -0
- data/ext/nmatrix/extconf.rb +70 -0
- data/ext/nmatrix/math/asum.h +99 -0
- data/ext/nmatrix/math/cblas_enums.h +36 -0
- data/ext/nmatrix/math/cblas_templates_core.h +507 -0
- data/ext/nmatrix/math/gemm.h +241 -0
- data/ext/nmatrix/math/gemv.h +178 -0
- data/ext/nmatrix/math/getrf.h +255 -0
- data/ext/nmatrix/math/getrs.h +121 -0
- data/ext/nmatrix/math/imax.h +82 -0
- data/ext/nmatrix/math/laswp.h +165 -0
- data/ext/nmatrix/math/long_dtype.h +62 -0
- data/ext/nmatrix/math/magnitude.h +54 -0
- data/ext/nmatrix/math/math.h +751 -0
- data/ext/nmatrix/math/nrm2.h +165 -0
- data/ext/nmatrix/math/rot.h +117 -0
- data/ext/nmatrix/math/rotg.h +106 -0
- data/ext/nmatrix/math/scal.h +71 -0
- data/ext/nmatrix/math/trsm.h +336 -0
- data/ext/nmatrix/math/util.h +162 -0
- data/ext/nmatrix/math.cpp +1368 -0
- data/ext/nmatrix/nm_memory.h +60 -0
- data/ext/nmatrix/nmatrix.cpp +285 -0
- data/ext/nmatrix/nmatrix.h +476 -0
- data/ext/nmatrix/ruby_constants.cpp +151 -0
- data/ext/nmatrix/ruby_constants.h +106 -0
- data/ext/nmatrix/ruby_nmatrix.c +3130 -0
- data/ext/nmatrix/storage/common.cpp +77 -0
- data/ext/nmatrix/storage/common.h +183 -0
- data/ext/nmatrix/storage/dense/dense.cpp +1096 -0
- data/ext/nmatrix/storage/dense/dense.h +129 -0
- data/ext/nmatrix/storage/list/list.cpp +1628 -0
- data/ext/nmatrix/storage/list/list.h +138 -0
- data/ext/nmatrix/storage/storage.cpp +730 -0
- data/ext/nmatrix/storage/storage.h +99 -0
- data/ext/nmatrix/storage/yale/class.h +1139 -0
- data/ext/nmatrix/storage/yale/iterators/base.h +143 -0
- data/ext/nmatrix/storage/yale/iterators/iterator.h +131 -0
- data/ext/nmatrix/storage/yale/iterators/row.h +450 -0
- data/ext/nmatrix/storage/yale/iterators/row_stored.h +140 -0
- data/ext/nmatrix/storage/yale/iterators/row_stored_nd.h +169 -0
- data/ext/nmatrix/storage/yale/iterators/stored_diagonal.h +124 -0
- data/ext/nmatrix/storage/yale/math/transpose.h +110 -0
- data/ext/nmatrix/storage/yale/yale.cpp +2074 -0
- data/ext/nmatrix/storage/yale/yale.h +203 -0
- data/ext/nmatrix/types.h +55 -0
- data/ext/nmatrix/util/io.cpp +279 -0
- data/ext/nmatrix/util/io.h +115 -0
- data/ext/nmatrix/util/sl_list.cpp +627 -0
- data/ext/nmatrix/util/sl_list.h +144 -0
- data/ext/nmatrix/util/util.h +78 -0
- data/lib/nmatrix/blas.rb +378 -0
- data/lib/nmatrix/cruby/math.rb +744 -0
- data/lib/nmatrix/enumerate.rb +253 -0
- data/lib/nmatrix/homogeneous.rb +241 -0
- data/lib/nmatrix/io/fortran_format.rb +138 -0
- data/lib/nmatrix/io/harwell_boeing.rb +221 -0
- data/lib/nmatrix/io/market.rb +263 -0
- data/lib/nmatrix/io/point_cloud.rb +189 -0
- data/lib/nmatrix/jruby/decomposition.rb +24 -0
- data/lib/nmatrix/jruby/enumerable.rb +13 -0
- data/lib/nmatrix/jruby/error.rb +4 -0
- data/lib/nmatrix/jruby/math.rb +501 -0
- data/lib/nmatrix/jruby/nmatrix_java.rb +840 -0
- data/lib/nmatrix/jruby/operators.rb +283 -0
- data/lib/nmatrix/jruby/slice.rb +264 -0
- data/lib/nmatrix/lapack_core.rb +181 -0
- data/lib/nmatrix/lapack_plugin.rb +44 -0
- data/lib/nmatrix/math.rb +953 -0
- data/lib/nmatrix/mkmf.rb +100 -0
- data/lib/nmatrix/monkeys.rb +137 -0
- data/lib/nmatrix/nmatrix.rb +1172 -0
- data/lib/nmatrix/rspec.rb +75 -0
- data/lib/nmatrix/shortcuts.rb +1163 -0
- data/lib/nmatrix/version.rb +39 -0
- data/lib/nmatrix/yale_functions.rb +118 -0
- data/lib/nmatrix.rb +28 -0
- data/spec/00_nmatrix_spec.rb +892 -0
- data/spec/01_enum_spec.rb +196 -0
- data/spec/02_slice_spec.rb +407 -0
- data/spec/03_nmatrix_monkeys_spec.rb +80 -0
- data/spec/2x2_dense_double.mat +0 -0
- data/spec/4x4_sparse.mat +0 -0
- data/spec/4x5_dense.mat +0 -0
- data/spec/blas_spec.rb +215 -0
- data/spec/elementwise_spec.rb +311 -0
- data/spec/homogeneous_spec.rb +100 -0
- data/spec/io/fortran_format_spec.rb +88 -0
- data/spec/io/harwell_boeing_spec.rb +98 -0
- data/spec/io/test.rua +9 -0
- data/spec/io_spec.rb +159 -0
- data/spec/lapack_core_spec.rb +482 -0
- data/spec/leakcheck.rb +16 -0
- data/spec/math_spec.rb +1363 -0
- data/spec/nmatrix_yale_resize_test_associations.yaml +2802 -0
- data/spec/nmatrix_yale_spec.rb +286 -0
- data/spec/rspec_monkeys.rb +56 -0
- data/spec/rspec_spec.rb +35 -0
- data/spec/shortcuts_spec.rb +474 -0
- data/spec/slice_set_spec.rb +162 -0
- data/spec/spec_helper.rb +172 -0
- data/spec/stat_spec.rb +214 -0
- data/spec/test.pcd +20 -0
- data/spec/utm5940.mtx +83844 -0
- metadata +295 -0
|
@@ -0,0 +1,1096 @@
|
|
|
1
|
+
/////////////////////////////////////////////////////////////////////
|
|
2
|
+
// = NMatrix
|
|
3
|
+
//
|
|
4
|
+
// A linear algebra library for scientific computation in Ruby.
|
|
5
|
+
// NMatrix is part of SciRuby.
|
|
6
|
+
//
|
|
7
|
+
// NMatrix was originally inspired by and derived from NArray, by
|
|
8
|
+
// Masahiro Tanaka: http://narray.rubyforge.org
|
|
9
|
+
//
|
|
10
|
+
// == Copyright Information
|
|
11
|
+
//
|
|
12
|
+
// SciRuby is Copyright (c) 2010 - 2014, Ruby Science Foundation
|
|
13
|
+
// NMatrix is Copyright (c) 2012 - 2014, John Woods and the Ruby Science Foundation
|
|
14
|
+
//
|
|
15
|
+
// Please see LICENSE.txt for additional copyright notices.
|
|
16
|
+
//
|
|
17
|
+
// == Contributing
|
|
18
|
+
//
|
|
19
|
+
// By contributing source code to SciRuby, you agree to be bound by
|
|
20
|
+
// our Contributor Agreement:
|
|
21
|
+
//
|
|
22
|
+
// * https://github.com/SciRuby/sciruby/wiki/Contributor-Agreement
|
|
23
|
+
//
|
|
24
|
+
// == dense.c
|
|
25
|
+
//
|
|
26
|
+
// Dense n-dimensional matrix storage.
|
|
27
|
+
|
|
28
|
+
/*
|
|
29
|
+
* Standard Includes
|
|
30
|
+
*/
|
|
31
|
+
|
|
32
|
+
#include <ruby.h>
|
|
33
|
+
|
|
34
|
+
/*
|
|
35
|
+
* Project Includes
|
|
36
|
+
*/
|
|
37
|
+
#include "../../data/data.h"
|
|
38
|
+
#include "../../math/long_dtype.h"
|
|
39
|
+
#include "../../math/gemm.h"
|
|
40
|
+
#include "../../math/gemv.h"
|
|
41
|
+
#include "../../math/math.h"
|
|
42
|
+
#include "../common.h"
|
|
43
|
+
#include "dense.h"
|
|
44
|
+
|
|
45
|
+
/*
|
|
46
|
+
* Macros
|
|
47
|
+
*/
|
|
48
|
+
|
|
49
|
+
/*
|
|
50
|
+
* Global Variables
|
|
51
|
+
*/
|
|
52
|
+
|
|
53
|
+
/*
|
|
54
|
+
* Forward Declarations
|
|
55
|
+
*/
|
|
56
|
+
|
|
57
|
+
namespace nm { namespace dense_storage {
|
|
58
|
+
|
|
59
|
+
template<typename LDType, typename RDType>
|
|
60
|
+
void ref_slice_copy_transposed(const DENSE_STORAGE* rhs, DENSE_STORAGE* lhs);
|
|
61
|
+
|
|
62
|
+
template <typename LDType, typename RDType>
|
|
63
|
+
DENSE_STORAGE* cast_copy(const DENSE_STORAGE* rhs, nm::dtype_t new_dtype);
|
|
64
|
+
|
|
65
|
+
template <typename LDType, typename RDType>
|
|
66
|
+
bool eqeq(const DENSE_STORAGE* left, const DENSE_STORAGE* right);
|
|
67
|
+
|
|
68
|
+
template <typename DType>
|
|
69
|
+
static DENSE_STORAGE* matrix_multiply(const STORAGE_PAIR& casted_storage, size_t* resulting_shape, bool vector);
|
|
70
|
+
|
|
71
|
+
template <typename DType>
|
|
72
|
+
bool is_hermitian(const DENSE_STORAGE* mat, int lda);
|
|
73
|
+
|
|
74
|
+
template <typename DType>
|
|
75
|
+
bool is_symmetric(const DENSE_STORAGE* mat, int lda);
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
/*
|
|
79
|
+
* Recursive slicing for N-dimensional matrix.
|
|
80
|
+
*/
|
|
81
|
+
template <typename LDType, typename RDType>
|
|
82
|
+
static void slice_copy(DENSE_STORAGE *dest, const DENSE_STORAGE *src, size_t* lengths, size_t pdest, size_t psrc, size_t n) {
|
|
83
|
+
if (src->dim - n > 1) {
|
|
84
|
+
for (size_t i = 0; i < lengths[n]; ++i) {
|
|
85
|
+
slice_copy<LDType,RDType>(dest, src, lengths,
|
|
86
|
+
pdest + dest->stride[n]*i,
|
|
87
|
+
psrc + src->stride[n]*i,
|
|
88
|
+
n + 1);
|
|
89
|
+
}
|
|
90
|
+
} else {
|
|
91
|
+
for (size_t p = 0; p < dest->shape[n]; ++p) {
|
|
92
|
+
reinterpret_cast<LDType*>(dest->elements)[p+pdest] = reinterpret_cast<RDType*>(src->elements)[p+psrc];
|
|
93
|
+
}
|
|
94
|
+
/*memcpy((char*)dest->elements + pdest*DTYPE_SIZES[dest->dtype],
|
|
95
|
+
(char*)src->elements + psrc*DTYPE_SIZES[src->dtype],
|
|
96
|
+
dest->shape[n]*DTYPE_SIZES[dest->dtype]); */
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
/*
|
|
102
|
+
* Recursive function, sets multiple values in a matrix from a single source value. Same basic pattern as slice_copy.
|
|
103
|
+
*/
|
|
104
|
+
template <typename D>
|
|
105
|
+
static void slice_set(DENSE_STORAGE* dest, size_t* lengths, size_t pdest, size_t rank, D* const v, size_t v_size, size_t& v_offset) {
|
|
106
|
+
if (dest->dim - rank > 1) {
|
|
107
|
+
for (size_t i = 0; i < lengths[rank]; ++i) {
|
|
108
|
+
slice_set<D>(dest, lengths, pdest + dest->stride[rank] * i, rank + 1, v, v_size, v_offset);
|
|
109
|
+
}
|
|
110
|
+
} else {
|
|
111
|
+
for (size_t p = 0; p < lengths[rank]; ++p, ++v_offset) {
|
|
112
|
+
if (v_offset >= v_size) v_offset %= v_size;
|
|
113
|
+
|
|
114
|
+
D* elem = reinterpret_cast<D*>(dest->elements);
|
|
115
|
+
elem[p + pdest] = v[v_offset];
|
|
116
|
+
}
|
|
117
|
+
}
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
|
|
121
|
+
/*
|
|
122
|
+
* Dense storage set/slice-set function, templated version.
|
|
123
|
+
*/
|
|
124
|
+
template <typename D>
|
|
125
|
+
void set(VALUE left, SLICE* slice, VALUE right) {
|
|
126
|
+
NM_CONSERVATIVE(nm_register_value(&left));
|
|
127
|
+
NM_CONSERVATIVE(nm_register_value(&right));
|
|
128
|
+
|
|
129
|
+
DENSE_STORAGE* s = NM_STORAGE_DENSE(left);
|
|
130
|
+
|
|
131
|
+
std::pair<NMATRIX*,bool> nm_and_free =
|
|
132
|
+
interpret_arg_as_dense_nmatrix(right, s->dtype);
|
|
133
|
+
|
|
134
|
+
// Map the data onto D* v.
|
|
135
|
+
D* v;
|
|
136
|
+
size_t v_size = 1;
|
|
137
|
+
|
|
138
|
+
if (nm_and_free.first) {
|
|
139
|
+
DENSE_STORAGE* t = reinterpret_cast<DENSE_STORAGE*>(nm_and_free.first->storage);
|
|
140
|
+
v = reinterpret_cast<D*>(t->elements);
|
|
141
|
+
v_size = nm_storage_count_max_elements(t);
|
|
142
|
+
|
|
143
|
+
} else if (RB_TYPE_P(right, T_ARRAY)) {
|
|
144
|
+
|
|
145
|
+
v_size = RARRAY_LEN(right);
|
|
146
|
+
v = NM_ALLOC_N(D, v_size);
|
|
147
|
+
if (s->dtype == nm::RUBYOBJ)
|
|
148
|
+
nm_register_values(reinterpret_cast<VALUE*>(v), v_size);
|
|
149
|
+
|
|
150
|
+
for (size_t m = 0; m < v_size; ++m) {
|
|
151
|
+
rubyval_to_cval(rb_ary_entry(right, m), s->dtype, &(v[m]));
|
|
152
|
+
}
|
|
153
|
+
|
|
154
|
+
} else {
|
|
155
|
+
v = reinterpret_cast<D*>(rubyobj_to_cval(right, NM_DTYPE(left)));
|
|
156
|
+
if (s->dtype == nm::RUBYOBJ)
|
|
157
|
+
nm_register_values(reinterpret_cast<VALUE*>(v), v_size);
|
|
158
|
+
}
|
|
159
|
+
|
|
160
|
+
if (slice->single) {
|
|
161
|
+
reinterpret_cast<D*>(s->elements)[nm_dense_storage_pos(s, slice->coords)] = *v;
|
|
162
|
+
} else {
|
|
163
|
+
size_t v_offset = 0;
|
|
164
|
+
slice_set(s, slice->lengths, nm_dense_storage_pos(s, slice->coords), 0, v, v_size, v_offset);
|
|
165
|
+
}
|
|
166
|
+
|
|
167
|
+
// Only free v if it was allocated in this function.
|
|
168
|
+
if (nm_and_free.first) {
|
|
169
|
+
if (nm_and_free.second) {
|
|
170
|
+
nm_delete(nm_and_free.first);
|
|
171
|
+
}
|
|
172
|
+
} else {
|
|
173
|
+
if (s->dtype == nm::RUBYOBJ)
|
|
174
|
+
nm_unregister_values(reinterpret_cast<VALUE*>(v), v_size);
|
|
175
|
+
NM_FREE(v);
|
|
176
|
+
}
|
|
177
|
+
NM_CONSERVATIVE(nm_unregister_value(&left));
|
|
178
|
+
NM_CONSERVATIVE(nm_unregister_value(&right));
|
|
179
|
+
|
|
180
|
+
}
|
|
181
|
+
|
|
182
|
+
}} // end of namespace nm::dense_storage
|
|
183
|
+
|
|
184
|
+
|
|
185
|
+
extern "C" {
|
|
186
|
+
|
|
187
|
+
static size_t* stride(size_t* shape, size_t dim);
|
|
188
|
+
static void slice_copy(DENSE_STORAGE *dest, const DENSE_STORAGE *src, size_t* lengths, size_t pdest, size_t psrc, size_t n);
|
|
189
|
+
|
|
190
|
+
/*
|
|
191
|
+
* Functions
|
|
192
|
+
*/
|
|
193
|
+
|
|
194
|
+
///////////////
|
|
195
|
+
// Lifecycle //
|
|
196
|
+
///////////////
|
|
197
|
+
|
|
198
|
+
|
|
199
|
+
/*
|
|
200
|
+
* This creates a dummy with all the properties of dense storage, but no actual elements allocation.
|
|
201
|
+
*
|
|
202
|
+
* elements will be NULL when this function finishes. You can clean up with nm_dense_storage_delete, which will
|
|
203
|
+
* check for that NULL pointer before freeing elements.
|
|
204
|
+
*/
|
|
205
|
+
static DENSE_STORAGE* nm_dense_storage_create_dummy(nm::dtype_t dtype, size_t* shape, size_t dim) {
|
|
206
|
+
DENSE_STORAGE* s = NM_ALLOC( DENSE_STORAGE );
|
|
207
|
+
|
|
208
|
+
s->dim = dim;
|
|
209
|
+
s->shape = shape;
|
|
210
|
+
s->dtype = dtype;
|
|
211
|
+
|
|
212
|
+
s->offset = NM_ALLOC_N(size_t, dim);
|
|
213
|
+
memset(s->offset, 0, sizeof(size_t)*dim);
|
|
214
|
+
|
|
215
|
+
s->stride = stride(shape, dim);
|
|
216
|
+
s->count = 1;
|
|
217
|
+
s->src = s;
|
|
218
|
+
|
|
219
|
+
s->elements = NULL;
|
|
220
|
+
|
|
221
|
+
return s;
|
|
222
|
+
}
|
|
223
|
+
|
|
224
|
+
|
|
225
|
+
/*
|
|
226
|
+
* Note that elements and elements_length are for initial value(s) passed in.
|
|
227
|
+
* If they are the correct length, they will be used directly. If not, they
|
|
228
|
+
* will be concatenated over and over again into a new elements array. If
|
|
229
|
+
* elements is NULL, the new elements array will not be initialized.
|
|
230
|
+
*/
|
|
231
|
+
DENSE_STORAGE* nm_dense_storage_create(nm::dtype_t dtype, size_t* shape, size_t dim, void* elements, size_t elements_length) {
|
|
232
|
+
if (dtype == nm::RUBYOBJ)
|
|
233
|
+
nm_register_values(reinterpret_cast<VALUE*>(elements), elements_length);
|
|
234
|
+
|
|
235
|
+
DENSE_STORAGE* s = nm_dense_storage_create_dummy(dtype, shape, dim);
|
|
236
|
+
size_t count = nm_storage_count_max_elements(s);
|
|
237
|
+
|
|
238
|
+
if (elements_length == count) {
|
|
239
|
+
s->elements = elements;
|
|
240
|
+
|
|
241
|
+
if (dtype == nm::RUBYOBJ)
|
|
242
|
+
nm_unregister_values(reinterpret_cast<VALUE*>(elements), elements_length);
|
|
243
|
+
|
|
244
|
+
} else {
|
|
245
|
+
|
|
246
|
+
s->elements = NM_ALLOC_N(char, DTYPE_SIZES[dtype]*count);
|
|
247
|
+
|
|
248
|
+
if (dtype == nm::RUBYOBJ)
|
|
249
|
+
nm_unregister_values(reinterpret_cast<VALUE*>(elements), elements_length);
|
|
250
|
+
|
|
251
|
+
size_t copy_length = elements_length;
|
|
252
|
+
|
|
253
|
+
if (elements_length > 0) {
|
|
254
|
+
// Repeat elements over and over again until the end of the matrix.
|
|
255
|
+
for (size_t i = 0; i < count; i += elements_length) {
|
|
256
|
+
|
|
257
|
+
if (i + elements_length > count) {
|
|
258
|
+
copy_length = count - i;
|
|
259
|
+
}
|
|
260
|
+
|
|
261
|
+
memcpy((char*)(s->elements)+i*DTYPE_SIZES[dtype], (char*)(elements)+(i % elements_length)*DTYPE_SIZES[dtype], copy_length*DTYPE_SIZES[dtype]);
|
|
262
|
+
}
|
|
263
|
+
|
|
264
|
+
// Get rid of the init_val.
|
|
265
|
+
NM_FREE(elements);
|
|
266
|
+
}
|
|
267
|
+
}
|
|
268
|
+
|
|
269
|
+
return s;
|
|
270
|
+
}
|
|
271
|
+
|
|
272
|
+
|
|
273
|
+
/*
|
|
274
|
+
* Destructor for dense storage. Make sure when you update this you also update nm_dense_storage_delete_dummy.
|
|
275
|
+
*/
|
|
276
|
+
void nm_dense_storage_delete(STORAGE* s) {
|
|
277
|
+
// Sometimes Ruby passes in NULL storage for some reason (probably on copy construction failure).
|
|
278
|
+
if (s) {
|
|
279
|
+
DENSE_STORAGE* storage = (DENSE_STORAGE*)s;
|
|
280
|
+
if(storage->count-- == 1) {
|
|
281
|
+
NM_FREE(storage->shape);
|
|
282
|
+
NM_FREE(storage->offset);
|
|
283
|
+
NM_FREE(storage->stride);
|
|
284
|
+
if (storage->elements != NULL) {// happens with dummy objects
|
|
285
|
+
NM_FREE(storage->elements);
|
|
286
|
+
}
|
|
287
|
+
NM_FREE(storage);
|
|
288
|
+
}
|
|
289
|
+
}
|
|
290
|
+
}
|
|
291
|
+
|
|
292
|
+
/*
|
|
293
|
+
* Destructor for dense storage references (slicing).
|
|
294
|
+
*/
|
|
295
|
+
void nm_dense_storage_delete_ref(STORAGE* s) {
|
|
296
|
+
// Sometimes Ruby passes in NULL storage for some reason (probably on copy construction failure).
|
|
297
|
+
if (s) {
|
|
298
|
+
DENSE_STORAGE* storage = (DENSE_STORAGE*)s;
|
|
299
|
+
nm_dense_storage_delete( reinterpret_cast<STORAGE*>(storage->src) );
|
|
300
|
+
NM_FREE(storage->shape);
|
|
301
|
+
NM_FREE(storage->offset);
|
|
302
|
+
NM_FREE(storage);
|
|
303
|
+
}
|
|
304
|
+
}
|
|
305
|
+
|
|
306
|
+
/*
|
|
307
|
+
* Mark values in a dense matrix for garbage collection. This may not be necessary -- further testing required.
|
|
308
|
+
*/
|
|
309
|
+
void nm_dense_storage_mark(STORAGE* storage_base) {
|
|
310
|
+
|
|
311
|
+
DENSE_STORAGE* storage = (DENSE_STORAGE*)storage_base;
|
|
312
|
+
|
|
313
|
+
if (storage && storage->dtype == nm::RUBYOBJ) {
|
|
314
|
+
VALUE* els = reinterpret_cast<VALUE*>(storage->elements);
|
|
315
|
+
|
|
316
|
+
if (els) {
|
|
317
|
+
rb_gc_mark_locations(els, &(els[nm_storage_count_max_elements(storage)-1]));
|
|
318
|
+
}
|
|
319
|
+
//for (size_t index = nm_storage_count_max_elements(storage); index-- > 0;) {
|
|
320
|
+
// rb_gc_mark(els[index]);
|
|
321
|
+
//}
|
|
322
|
+
}
|
|
323
|
+
}
|
|
324
|
+
|
|
325
|
+
/**
|
|
326
|
+
* Register a dense storage struct as in-use to avoid garbage collection of the
|
|
327
|
+
* elements stored.
|
|
328
|
+
*
|
|
329
|
+
* This function will check dtype and ignore non-object dtype, so its safe to pass any dense storage in.
|
|
330
|
+
*
|
|
331
|
+
*/
|
|
332
|
+
void nm_dense_storage_register(const STORAGE* s) {
|
|
333
|
+
const DENSE_STORAGE* storage = reinterpret_cast<const DENSE_STORAGE*>(s);
|
|
334
|
+
if (storage->dtype == nm::RUBYOBJ && storage->elements) {
|
|
335
|
+
nm_register_values(reinterpret_cast<VALUE*>(storage->elements), nm_storage_count_max_elements(storage));
|
|
336
|
+
}
|
|
337
|
+
}
|
|
338
|
+
|
|
339
|
+
/**
|
|
340
|
+
* Unregister a dense storage struct to allow normal garbage collection of the
|
|
341
|
+
* elements stored.
|
|
342
|
+
*
|
|
343
|
+
* This function will check dtype and ignore non-object dtype, so its safe to pass any dense storage in.
|
|
344
|
+
*
|
|
345
|
+
*/
|
|
346
|
+
void nm_dense_storage_unregister(const STORAGE* s) {
|
|
347
|
+
const DENSE_STORAGE* storage = reinterpret_cast<const DENSE_STORAGE*>(s);
|
|
348
|
+
if (storage->dtype == nm::RUBYOBJ && storage->elements) {
|
|
349
|
+
nm_unregister_values(reinterpret_cast<VALUE*>(storage->elements), nm_storage_count_max_elements(storage));
|
|
350
|
+
}
|
|
351
|
+
}
|
|
352
|
+
|
|
353
|
+
///////////////
|
|
354
|
+
// Accessors //
|
|
355
|
+
///////////////
|
|
356
|
+
|
|
357
|
+
|
|
358
|
+
|
|
359
|
+
/*
|
|
360
|
+
* map_pair iterator for dense matrices (for element-wise operations)
|
|
361
|
+
*/
|
|
362
|
+
VALUE nm_dense_map_pair(VALUE self, VALUE right) {
|
|
363
|
+
|
|
364
|
+
NM_CONSERVATIVE(nm_register_value(&self));
|
|
365
|
+
NM_CONSERVATIVE(nm_register_value(&right));
|
|
366
|
+
|
|
367
|
+
RETURN_SIZED_ENUMERATOR_PRE
|
|
368
|
+
NM_CONSERVATIVE(nm_unregister_value(&right));
|
|
369
|
+
NM_CONSERVATIVE(nm_unregister_value(&self));
|
|
370
|
+
RETURN_SIZED_ENUMERATOR(self, 0, 0, nm_enumerator_length);
|
|
371
|
+
|
|
372
|
+
DENSE_STORAGE *s = NM_STORAGE_DENSE(self),
|
|
373
|
+
*t = NM_STORAGE_DENSE(right);
|
|
374
|
+
|
|
375
|
+
size_t* coords = NM_ALLOCA_N(size_t, s->dim);
|
|
376
|
+
memset(coords, 0, sizeof(size_t) * s->dim);
|
|
377
|
+
|
|
378
|
+
size_t *shape_copy = NM_ALLOC_N(size_t, s->dim);
|
|
379
|
+
memcpy(shape_copy, s->shape, sizeof(size_t) * s->dim);
|
|
380
|
+
|
|
381
|
+
size_t count = nm_storage_count_max_elements(s);
|
|
382
|
+
|
|
383
|
+
DENSE_STORAGE* result = nm_dense_storage_create(nm::RUBYOBJ, shape_copy, s->dim, NULL, 0);
|
|
384
|
+
|
|
385
|
+
VALUE* result_elem = reinterpret_cast<VALUE*>(result->elements);
|
|
386
|
+
nm_dense_storage_register(result);
|
|
387
|
+
|
|
388
|
+
for (size_t k = 0; k < count; ++k) {
|
|
389
|
+
nm_dense_storage_coords(result, k, coords);
|
|
390
|
+
size_t s_index = nm_dense_storage_pos(s, coords),
|
|
391
|
+
t_index = nm_dense_storage_pos(t, coords);
|
|
392
|
+
|
|
393
|
+
VALUE sval = NM_DTYPE(self) == nm::RUBYOBJ ? reinterpret_cast<VALUE*>(s->elements)[s_index] : nm::rubyobj_from_cval((char*)(s->elements) + s_index*DTYPE_SIZES[NM_DTYPE(self)], NM_DTYPE(self)).rval;
|
|
394
|
+
nm_register_value(&sval);
|
|
395
|
+
VALUE tval = NM_DTYPE(right) == nm::RUBYOBJ ? reinterpret_cast<VALUE*>(t->elements)[t_index] : nm::rubyobj_from_cval((char*)(t->elements) + t_index*DTYPE_SIZES[NM_DTYPE(right)], NM_DTYPE(right)).rval;
|
|
396
|
+
result_elem[k] = rb_yield_values(2, sval, tval);
|
|
397
|
+
nm_unregister_value(&sval);
|
|
398
|
+
}
|
|
399
|
+
|
|
400
|
+
VALUE klass = CLASS_OF(self);
|
|
401
|
+
NMATRIX* m = nm_create(nm::DENSE_STORE, reinterpret_cast<STORAGE*>(result));
|
|
402
|
+
nm_register_nmatrix(m);
|
|
403
|
+
VALUE to_return = Data_Wrap_Struct(klass, nm_mark, nm_delete, m);
|
|
404
|
+
|
|
405
|
+
nm_unregister_nmatrix(m);
|
|
406
|
+
nm_dense_storage_unregister(result);
|
|
407
|
+
NM_CONSERVATIVE(nm_unregister_value(&self));
|
|
408
|
+
NM_CONSERVATIVE(nm_unregister_value(&right));
|
|
409
|
+
|
|
410
|
+
return to_return;
|
|
411
|
+
|
|
412
|
+
}
|
|
413
|
+
|
|
414
|
+
/*
|
|
415
|
+
* map enumerator for dense matrices.
|
|
416
|
+
*/
|
|
417
|
+
VALUE nm_dense_map(VALUE self) {
|
|
418
|
+
|
|
419
|
+
NM_CONSERVATIVE(nm_register_value(&self));
|
|
420
|
+
|
|
421
|
+
RETURN_SIZED_ENUMERATOR_PRE
|
|
422
|
+
NM_CONSERVATIVE(nm_unregister_value(&self));
|
|
423
|
+
RETURN_SIZED_ENUMERATOR(self, 0, 0, nm_enumerator_length);
|
|
424
|
+
|
|
425
|
+
DENSE_STORAGE *s = NM_STORAGE_DENSE(self);
|
|
426
|
+
|
|
427
|
+
size_t* coords = NM_ALLOCA_N(size_t, s->dim);
|
|
428
|
+
memset(coords, 0, sizeof(size_t) * s->dim);
|
|
429
|
+
|
|
430
|
+
size_t *shape_copy = NM_ALLOC_N(size_t, s->dim);
|
|
431
|
+
memcpy(shape_copy, s->shape, sizeof(size_t) * s->dim);
|
|
432
|
+
|
|
433
|
+
size_t count = nm_storage_count_max_elements(s);
|
|
434
|
+
|
|
435
|
+
DENSE_STORAGE* result = nm_dense_storage_create(nm::RUBYOBJ, shape_copy, s->dim, NULL, 0);
|
|
436
|
+
|
|
437
|
+
VALUE* result_elem = reinterpret_cast<VALUE*>(result->elements);
|
|
438
|
+
|
|
439
|
+
nm_dense_storage_register(result);
|
|
440
|
+
|
|
441
|
+
for (size_t k = 0; k < count; ++k) {
|
|
442
|
+
nm_dense_storage_coords(result, k, coords);
|
|
443
|
+
size_t s_index = nm_dense_storage_pos(s, coords);
|
|
444
|
+
|
|
445
|
+
result_elem[k] = rb_yield(NM_DTYPE(self) == nm::RUBYOBJ ? reinterpret_cast<VALUE*>(s->elements)[s_index] : nm::rubyobj_from_cval((char*)(s->elements) + s_index*DTYPE_SIZES[NM_DTYPE(self)], NM_DTYPE(self)).rval);
|
|
446
|
+
}
|
|
447
|
+
|
|
448
|
+
VALUE klass = CLASS_OF(self);
|
|
449
|
+
|
|
450
|
+
NMATRIX* m = nm_create(nm::DENSE_STORE, reinterpret_cast<STORAGE*>(result));
|
|
451
|
+
nm_register_nmatrix(m);
|
|
452
|
+
|
|
453
|
+
VALUE to_return = Data_Wrap_Struct(klass, nm_mark, nm_delete, m);
|
|
454
|
+
|
|
455
|
+
nm_unregister_nmatrix(m);
|
|
456
|
+
nm_dense_storage_unregister(result);
|
|
457
|
+
NM_CONSERVATIVE(nm_unregister_value(&self));
|
|
458
|
+
|
|
459
|
+
return to_return;
|
|
460
|
+
}
|
|
461
|
+
|
|
462
|
+
|
|
463
|
+
/*
|
|
464
|
+
* each_with_indices iterator for dense matrices.
|
|
465
|
+
*/
|
|
466
|
+
VALUE nm_dense_each_with_indices(VALUE nmatrix) {
|
|
467
|
+
|
|
468
|
+
NM_CONSERVATIVE(nm_register_value(&nmatrix));
|
|
469
|
+
|
|
470
|
+
RETURN_SIZED_ENUMERATOR_PRE
|
|
471
|
+
NM_CONSERVATIVE(nm_unregister_value(&nmatrix));
|
|
472
|
+
RETURN_SIZED_ENUMERATOR(nmatrix, 0, 0, nm_enumerator_length); // fourth argument only used by Ruby2+
|
|
473
|
+
DENSE_STORAGE* s = NM_STORAGE_DENSE(nmatrix);
|
|
474
|
+
|
|
475
|
+
// Create indices and initialize them to zero
|
|
476
|
+
size_t* coords = NM_ALLOCA_N(size_t, s->dim);
|
|
477
|
+
memset(coords, 0, sizeof(size_t) * s->dim);
|
|
478
|
+
|
|
479
|
+
size_t slice_index;
|
|
480
|
+
size_t* shape_copy = NM_ALLOC_N(size_t, s->dim);
|
|
481
|
+
memcpy(shape_copy, s->shape, sizeof(size_t) * s->dim);
|
|
482
|
+
|
|
483
|
+
DENSE_STORAGE* sliced_dummy = nm_dense_storage_create_dummy(s->dtype, shape_copy, s->dim);
|
|
484
|
+
|
|
485
|
+
for (size_t k = 0; k < nm_storage_count_max_elements(s); ++k) {
|
|
486
|
+
nm_dense_storage_coords(sliced_dummy, k, coords);
|
|
487
|
+
slice_index = nm_dense_storage_pos(s, coords);
|
|
488
|
+
VALUE ary = rb_ary_new();
|
|
489
|
+
nm_register_value(&ary);
|
|
490
|
+
if (NM_DTYPE(nmatrix) == nm::RUBYOBJ) rb_ary_push(ary, reinterpret_cast<VALUE*>(s->elements)[slice_index]);
|
|
491
|
+
else rb_ary_push(ary, nm::rubyobj_from_cval((char*)(s->elements) + slice_index*DTYPE_SIZES[NM_DTYPE(nmatrix)], NM_DTYPE(nmatrix)).rval);
|
|
492
|
+
|
|
493
|
+
for (size_t p = 0; p < s->dim; ++p) {
|
|
494
|
+
rb_ary_push(ary, INT2FIX(coords[p]));
|
|
495
|
+
}
|
|
496
|
+
|
|
497
|
+
// yield the array which now consists of the value and the indices
|
|
498
|
+
rb_yield(ary);
|
|
499
|
+
nm_unregister_value(&ary);
|
|
500
|
+
}
|
|
501
|
+
|
|
502
|
+
nm_dense_storage_delete(sliced_dummy);
|
|
503
|
+
|
|
504
|
+
NM_CONSERVATIVE(nm_unregister_value(&nmatrix));
|
|
505
|
+
|
|
506
|
+
return nmatrix;
|
|
507
|
+
|
|
508
|
+
}
|
|
509
|
+
|
|
510
|
+
|
|
511
|
+
/*
|
|
512
|
+
* Borrowed this function from NArray. Handles 'each' iteration on a dense
|
|
513
|
+
* matrix.
|
|
514
|
+
*
|
|
515
|
+
* Additionally, handles separately matrices containing VALUEs and matrices
|
|
516
|
+
* containing other types of data.
|
|
517
|
+
*/
|
|
518
|
+
VALUE nm_dense_each(VALUE nmatrix) {
|
|
519
|
+
|
|
520
|
+
NM_CONSERVATIVE(nm_register_value(&nmatrix));
|
|
521
|
+
|
|
522
|
+
RETURN_SIZED_ENUMERATOR_PRE
|
|
523
|
+
NM_CONSERVATIVE(nm_unregister_value(&nmatrix));
|
|
524
|
+
RETURN_SIZED_ENUMERATOR(nmatrix, 0, 0, nm_enumerator_length);
|
|
525
|
+
|
|
526
|
+
DENSE_STORAGE* s = NM_STORAGE_DENSE(nmatrix);
|
|
527
|
+
|
|
528
|
+
size_t* temp_coords = NM_ALLOCA_N(size_t, s->dim);
|
|
529
|
+
size_t sliced_index;
|
|
530
|
+
size_t* shape_copy = NM_ALLOC_N(size_t, s->dim);
|
|
531
|
+
memcpy(shape_copy, s->shape, sizeof(size_t) * s->dim);
|
|
532
|
+
DENSE_STORAGE* sliced_dummy = nm_dense_storage_create_dummy(s->dtype, shape_copy, s->dim);
|
|
533
|
+
|
|
534
|
+
if (NM_DTYPE(nmatrix) == nm::RUBYOBJ) {
|
|
535
|
+
|
|
536
|
+
// matrix of Ruby objects -- yield those objects directly
|
|
537
|
+
for (size_t i = 0; i < nm_storage_count_max_elements(s); ++i) {
|
|
538
|
+
nm_dense_storage_coords(sliced_dummy, i, temp_coords);
|
|
539
|
+
sliced_index = nm_dense_storage_pos(s, temp_coords);
|
|
540
|
+
rb_yield( reinterpret_cast<VALUE*>(s->elements)[sliced_index] );
|
|
541
|
+
}
|
|
542
|
+
|
|
543
|
+
} else {
|
|
544
|
+
|
|
545
|
+
// We're going to copy the matrix element into a Ruby VALUE and then operate on it. This way user can't accidentally
|
|
546
|
+
// modify it and cause a seg fault.
|
|
547
|
+
for (size_t i = 0; i < nm_storage_count_max_elements(s); ++i) {
|
|
548
|
+
nm_dense_storage_coords(sliced_dummy, i, temp_coords);
|
|
549
|
+
sliced_index = nm_dense_storage_pos(s, temp_coords);
|
|
550
|
+
VALUE v = nm::rubyobj_from_cval((char*)(s->elements) + sliced_index*DTYPE_SIZES[NM_DTYPE(nmatrix)], NM_DTYPE(nmatrix)).rval;
|
|
551
|
+
rb_yield( v ); // yield to the copy we made
|
|
552
|
+
}
|
|
553
|
+
}
|
|
554
|
+
|
|
555
|
+
nm_dense_storage_delete(sliced_dummy);
|
|
556
|
+
NM_CONSERVATIVE(nm_unregister_value(&nmatrix));
|
|
557
|
+
|
|
558
|
+
return nmatrix;
|
|
559
|
+
|
|
560
|
+
}
|
|
561
|
+
|
|
562
|
+
|
|
563
|
+
/*
|
|
564
|
+
* Non-templated version of nm::dense_storage::slice_copy
|
|
565
|
+
*/
|
|
566
|
+
static void slice_copy(DENSE_STORAGE *dest, const DENSE_STORAGE *src, size_t* lengths, size_t pdest, size_t psrc, size_t n) {
|
|
567
|
+
NAMED_LR_DTYPE_TEMPLATE_TABLE(slice_copy_table, nm::dense_storage::slice_copy, void, DENSE_STORAGE*, const DENSE_STORAGE*, size_t*, size_t, size_t, size_t)
|
|
568
|
+
|
|
569
|
+
slice_copy_table[dest->dtype][src->dtype](dest, src, lengths, pdest, psrc, n);
|
|
570
|
+
}
|
|
571
|
+
|
|
572
|
+
|
|
573
|
+
/*
|
|
574
|
+
* Get a slice or one element, using copying.
|
|
575
|
+
*
|
|
576
|
+
* FIXME: Template the first condition.
|
|
577
|
+
*/
|
|
578
|
+
void* nm_dense_storage_get(const STORAGE* storage, SLICE* slice) {
|
|
579
|
+
DENSE_STORAGE* s = (DENSE_STORAGE*)storage;
|
|
580
|
+
if (slice->single)
|
|
581
|
+
return (char*)(s->elements) + nm_dense_storage_pos(s, slice->coords) * DTYPE_SIZES[s->dtype];
|
|
582
|
+
else {
|
|
583
|
+
nm_dense_storage_register(s);
|
|
584
|
+
size_t *shape = NM_ALLOC_N(size_t, s->dim);
|
|
585
|
+
for (size_t i = 0; i < s->dim; ++i) {
|
|
586
|
+
shape[i] = slice->lengths[i];
|
|
587
|
+
}
|
|
588
|
+
|
|
589
|
+
DENSE_STORAGE* ns = nm_dense_storage_create(s->dtype, shape, s->dim, NULL, 0);
|
|
590
|
+
|
|
591
|
+
slice_copy(ns,
|
|
592
|
+
reinterpret_cast<const DENSE_STORAGE*>(s->src),
|
|
593
|
+
slice->lengths,
|
|
594
|
+
0,
|
|
595
|
+
nm_dense_storage_pos(s, slice->coords),
|
|
596
|
+
0);
|
|
597
|
+
|
|
598
|
+
nm_dense_storage_unregister(s);
|
|
599
|
+
return ns;
|
|
600
|
+
}
|
|
601
|
+
}
|
|
602
|
+
|
|
603
|
+
/*
|
|
604
|
+
* Get a slice or one element by reference (no copy).
|
|
605
|
+
*
|
|
606
|
+
* FIXME: Template the first condition.
|
|
607
|
+
*/
|
|
608
|
+
void* nm_dense_storage_ref(const STORAGE* storage, SLICE* slice) {
|
|
609
|
+
DENSE_STORAGE* s = (DENSE_STORAGE*)storage;
|
|
610
|
+
|
|
611
|
+
if (slice->single)
|
|
612
|
+
return (char*)(s->elements) + nm_dense_storage_pos(s, slice->coords) * DTYPE_SIZES[s->dtype];
|
|
613
|
+
|
|
614
|
+
else {
|
|
615
|
+
nm_dense_storage_register(s);
|
|
616
|
+
DENSE_STORAGE* ns = NM_ALLOC( DENSE_STORAGE );
|
|
617
|
+
ns->dim = s->dim;
|
|
618
|
+
ns->dtype = s->dtype;
|
|
619
|
+
ns->offset = NM_ALLOC_N(size_t, ns->dim);
|
|
620
|
+
ns->shape = NM_ALLOC_N(size_t, ns->dim);
|
|
621
|
+
|
|
622
|
+
for (size_t i = 0; i < ns->dim; ++i) {
|
|
623
|
+
ns->offset[i] = slice->coords[i] + s->offset[i];
|
|
624
|
+
ns->shape[i] = slice->lengths[i];
|
|
625
|
+
}
|
|
626
|
+
|
|
627
|
+
ns->stride = s->stride;
|
|
628
|
+
ns->elements = s->elements;
|
|
629
|
+
|
|
630
|
+
s->src->count++;
|
|
631
|
+
ns->src = s->src;
|
|
632
|
+
|
|
633
|
+
nm_dense_storage_unregister(s);
|
|
634
|
+
return ns;
|
|
635
|
+
}
|
|
636
|
+
}
|
|
637
|
+
|
|
638
|
+
|
|
639
|
+
|
|
640
|
+
|
|
641
|
+
/*
|
|
642
|
+
* Set a value or values in a dense matrix. Requires that right be either a single value or an NMatrix (ref or real).
|
|
643
|
+
*/
|
|
644
|
+
void nm_dense_storage_set(VALUE left, SLICE* slice, VALUE right) {
|
|
645
|
+
NAMED_DTYPE_TEMPLATE_TABLE(ttable, nm::dense_storage::set, void, VALUE, SLICE*, VALUE)
|
|
646
|
+
nm::dtype_t dtype = NM_DTYPE(left);
|
|
647
|
+
ttable[dtype](left, slice, right);
|
|
648
|
+
}
|
|
649
|
+
|
|
650
|
+
|
|
651
|
+
///////////
|
|
652
|
+
// Tests //
|
|
653
|
+
///////////
|
|
654
|
+
|
|
655
|
+
/*
|
|
656
|
+
* Do these two dense matrices have the same contents?
|
|
657
|
+
*
|
|
658
|
+
* TODO: Test the shape of the two matrices.
|
|
659
|
+
* TODO: See if using memcmp is faster when the left- and right-hand matrices
|
|
660
|
+
* have the same dtype.
|
|
661
|
+
*/
|
|
662
|
+
bool nm_dense_storage_eqeq(const STORAGE* left, const STORAGE* right) {
|
|
663
|
+
LR_DTYPE_TEMPLATE_TABLE(nm::dense_storage::eqeq, bool, const DENSE_STORAGE*, const DENSE_STORAGE*)
|
|
664
|
+
|
|
665
|
+
if (!ttable[left->dtype][right->dtype]) {
|
|
666
|
+
rb_raise(nm_eDataTypeError, "comparison between these dtypes is undefined");
|
|
667
|
+
return false;
|
|
668
|
+
}
|
|
669
|
+
|
|
670
|
+
return ttable[left->dtype][right->dtype]((const DENSE_STORAGE*)left, (const DENSE_STORAGE*)right);
|
|
671
|
+
}
|
|
672
|
+
|
|
673
|
+
/*
|
|
674
|
+
* Test to see if the matrix is Hermitian. If the matrix does not have a
|
|
675
|
+
* dtype of Complex64 or Complex128 this is the same as testing for symmetry.
|
|
676
|
+
*/
|
|
677
|
+
bool nm_dense_storage_is_hermitian(const DENSE_STORAGE* mat, int lda) {
|
|
678
|
+
if (mat->dtype == nm::COMPLEX64) {
|
|
679
|
+
return nm::dense_storage::is_hermitian<nm::Complex64>(mat, lda);
|
|
680
|
+
|
|
681
|
+
} else if (mat->dtype == nm::COMPLEX128) {
|
|
682
|
+
return nm::dense_storage::is_hermitian<nm::Complex128>(mat, lda);
|
|
683
|
+
|
|
684
|
+
} else {
|
|
685
|
+
return nm_dense_storage_is_symmetric(mat, lda);
|
|
686
|
+
}
|
|
687
|
+
}
|
|
688
|
+
|
|
689
|
+
/*
|
|
690
|
+
* Is this dense matrix symmetric about the diagonal?
|
|
691
|
+
*/
|
|
692
|
+
bool nm_dense_storage_is_symmetric(const DENSE_STORAGE* mat, int lda) {
|
|
693
|
+
DTYPE_TEMPLATE_TABLE(nm::dense_storage::is_symmetric, bool, const DENSE_STORAGE*, int);
|
|
694
|
+
|
|
695
|
+
return ttable[mat->dtype](mat, lda);
|
|
696
|
+
}
|
|
697
|
+
|
|
698
|
+
//////////
|
|
699
|
+
// Math //
|
|
700
|
+
//////////
|
|
701
|
+
|
|
702
|
+
|
|
703
|
+
/*
|
|
704
|
+
* Dense matrix-matrix multiplication.
|
|
705
|
+
*/
|
|
706
|
+
STORAGE* nm_dense_storage_matrix_multiply(const STORAGE_PAIR& casted_storage, size_t* resulting_shape, bool vector) {
|
|
707
|
+
DTYPE_TEMPLATE_TABLE(nm::dense_storage::matrix_multiply, DENSE_STORAGE*, const STORAGE_PAIR& casted_storage, size_t* resulting_shape, bool vector);
|
|
708
|
+
|
|
709
|
+
return ttable[casted_storage.left->dtype](casted_storage, resulting_shape, vector);
|
|
710
|
+
}
|
|
711
|
+
|
|
712
|
+
/////////////
|
|
713
|
+
// Utility //
|
|
714
|
+
/////////////
|
|
715
|
+
|
|
716
|
+
/*
|
|
717
|
+
* Determine the linear array position (in elements of s) of some set of coordinates
|
|
718
|
+
* (given by slice).
|
|
719
|
+
*/
|
|
720
|
+
size_t nm_dense_storage_pos(const DENSE_STORAGE* s, const size_t* coords) {
|
|
721
|
+
size_t pos = 0;
|
|
722
|
+
|
|
723
|
+
for (size_t i = 0; i < s->dim; ++i)
|
|
724
|
+
pos += (coords[i] + s->offset[i]) * s->stride[i];
|
|
725
|
+
|
|
726
|
+
return pos;
|
|
727
|
+
|
|
728
|
+
}
|
|
729
|
+
|
|
730
|
+
/*
|
|
731
|
+
* Determine the a set of slice coordinates from linear array position (in elements
|
|
732
|
+
* of s) of some set of coordinates (given by slice). (Inverse of
|
|
733
|
+
* nm_dense_storage_pos).
|
|
734
|
+
*
|
|
735
|
+
* The parameter coords_out should be a pre-allocated array of size equal to s->dim.
|
|
736
|
+
*/
|
|
737
|
+
void nm_dense_storage_coords(const DENSE_STORAGE* s, const size_t slice_pos, size_t* coords_out) {
|
|
738
|
+
|
|
739
|
+
size_t temp_pos = slice_pos;
|
|
740
|
+
|
|
741
|
+
for (size_t i = 0; i < s->dim; ++i) {
|
|
742
|
+
coords_out[i] = (temp_pos - temp_pos % s->stride[i])/s->stride[i] - s->offset[i];
|
|
743
|
+
temp_pos = temp_pos % s->stride[i];
|
|
744
|
+
}
|
|
745
|
+
}
|
|
746
|
+
|
|
747
|
+
/*
|
|
748
|
+
* Calculate the stride length.
|
|
749
|
+
*/
|
|
750
|
+
static size_t* stride(size_t* shape, size_t dim) {
|
|
751
|
+
size_t i, j;
|
|
752
|
+
size_t* stride = NM_ALLOC_N(size_t, dim);
|
|
753
|
+
|
|
754
|
+
for (i = 0; i < dim; ++i) {
|
|
755
|
+
stride[i] = 1;
|
|
756
|
+
for (j = i+1; j < dim; ++j) {
|
|
757
|
+
stride[i] *= shape[j];
|
|
758
|
+
}
|
|
759
|
+
}
|
|
760
|
+
|
|
761
|
+
return stride;
|
|
762
|
+
}
|
|
763
|
+
|
|
764
|
+
|
|
765
|
+
/////////////////////////
|
|
766
|
+
// Copying and Casting //
|
|
767
|
+
/////////////////////////
|
|
768
|
+
|
|
769
|
+
/*
|
|
770
|
+
* Copy dense storage, changing dtype if necessary.
|
|
771
|
+
*/
|
|
772
|
+
STORAGE* nm_dense_storage_cast_copy(const STORAGE* rhs, nm::dtype_t new_dtype, void* dummy) {
|
|
773
|
+
NAMED_LR_DTYPE_TEMPLATE_TABLE(ttable, nm::dense_storage::cast_copy, DENSE_STORAGE*, const DENSE_STORAGE* rhs, nm::dtype_t new_dtype);
|
|
774
|
+
|
|
775
|
+
if (!ttable[new_dtype][rhs->dtype]) {
|
|
776
|
+
rb_raise(nm_eDataTypeError, "cast between these dtypes is undefined");
|
|
777
|
+
return NULL;
|
|
778
|
+
}
|
|
779
|
+
|
|
780
|
+
return (STORAGE*)ttable[new_dtype][rhs->dtype]((DENSE_STORAGE*)rhs, new_dtype);
|
|
781
|
+
}
|
|
782
|
+
|
|
783
|
+
/*
|
|
784
|
+
* Copy dense storage without a change in dtype.
|
|
785
|
+
*/
|
|
786
|
+
DENSE_STORAGE* nm_dense_storage_copy(const DENSE_STORAGE* rhs) {
|
|
787
|
+
nm_dense_storage_register(rhs);
|
|
788
|
+
|
|
789
|
+
size_t count = 0;
|
|
790
|
+
size_t *shape = NM_ALLOC_N(size_t, rhs->dim);
|
|
791
|
+
|
|
792
|
+
// copy shape and offset
|
|
793
|
+
for (size_t i = 0; i < rhs->dim; ++i) {
|
|
794
|
+
shape[i] = rhs->shape[i];
|
|
795
|
+
}
|
|
796
|
+
|
|
797
|
+
DENSE_STORAGE* lhs = nm_dense_storage_create(rhs->dtype, shape, rhs->dim, NULL, 0);
|
|
798
|
+
count = nm_storage_count_max_elements(lhs);
|
|
799
|
+
|
|
800
|
+
|
|
801
|
+
// Ensure that allocation worked before copying.
|
|
802
|
+
if (lhs && count) {
|
|
803
|
+
if (rhs == rhs->src) // not a reference
|
|
804
|
+
memcpy(lhs->elements, rhs->elements, DTYPE_SIZES[rhs->dtype] * count);
|
|
805
|
+
else { // slice whole matrix
|
|
806
|
+
nm_dense_storage_register(lhs);
|
|
807
|
+
size_t *offset = NM_ALLOC_N(size_t, rhs->dim);
|
|
808
|
+
memset(offset, 0, sizeof(size_t) * rhs->dim);
|
|
809
|
+
|
|
810
|
+
slice_copy(lhs,
|
|
811
|
+
reinterpret_cast<const DENSE_STORAGE*>(rhs->src),
|
|
812
|
+
rhs->shape,
|
|
813
|
+
0,
|
|
814
|
+
nm_dense_storage_pos(rhs, offset),
|
|
815
|
+
0);
|
|
816
|
+
|
|
817
|
+
nm_dense_storage_unregister(lhs);
|
|
818
|
+
}
|
|
819
|
+
}
|
|
820
|
+
|
|
821
|
+
nm_dense_storage_unregister(rhs);
|
|
822
|
+
|
|
823
|
+
return lhs;
|
|
824
|
+
}
|
|
825
|
+
|
|
826
|
+
|
|
827
|
+
/*
|
|
828
|
+
* Transpose dense storage into a new dense storage object. Basically a copy constructor.
|
|
829
|
+
*
|
|
830
|
+
* Not much point in templating this as it's pretty straight-forward.
|
|
831
|
+
*/
|
|
832
|
+
STORAGE* nm_dense_storage_copy_transposed(const STORAGE* rhs_base) {
|
|
833
|
+
DENSE_STORAGE* rhs = (DENSE_STORAGE*)rhs_base;
|
|
834
|
+
|
|
835
|
+
nm_dense_storage_register(rhs);
|
|
836
|
+
size_t *shape = NM_ALLOC_N(size_t, rhs->dim);
|
|
837
|
+
|
|
838
|
+
// swap shape
|
|
839
|
+
shape[0] = rhs->shape[1];
|
|
840
|
+
shape[1] = rhs->shape[0];
|
|
841
|
+
|
|
842
|
+
DENSE_STORAGE *lhs = nm_dense_storage_create(rhs->dtype, shape, rhs->dim, NULL, 0);
|
|
843
|
+
|
|
844
|
+
nm_dense_storage_register(lhs);
|
|
845
|
+
|
|
846
|
+
if (rhs_base->src == rhs_base) {
|
|
847
|
+
nm_math_transpose_generic(rhs->shape[0], rhs->shape[1], rhs->elements, rhs->shape[1], lhs->elements, lhs->shape[1], DTYPE_SIZES[rhs->dtype]);
|
|
848
|
+
} else {
|
|
849
|
+
NAMED_LR_DTYPE_TEMPLATE_TABLE(ttable, nm::dense_storage::ref_slice_copy_transposed, void, const DENSE_STORAGE* rhs, DENSE_STORAGE* lhs);
|
|
850
|
+
|
|
851
|
+
if (!ttable[lhs->dtype][rhs->dtype]) {
|
|
852
|
+
nm_dense_storage_unregister(rhs);
|
|
853
|
+
nm_dense_storage_unregister(lhs);
|
|
854
|
+
rb_raise(nm_eDataTypeError, "transposition between these dtypes is undefined");
|
|
855
|
+
}
|
|
856
|
+
|
|
857
|
+
ttable[lhs->dtype][rhs->dtype](rhs, lhs);
|
|
858
|
+
}
|
|
859
|
+
|
|
860
|
+
nm_dense_storage_unregister(rhs);
|
|
861
|
+
nm_dense_storage_unregister(lhs);
|
|
862
|
+
|
|
863
|
+
return (STORAGE*)lhs;
|
|
864
|
+
}
|
|
865
|
+
|
|
866
|
+
} // end of extern "C" block
|
|
867
|
+
|
|
868
|
+
namespace nm {
|
|
869
|
+
|
|
870
|
+
/*
|
|
871
|
+
* Used for slice setting. Takes the right-hand of the equal sign, a single VALUE, and massages
|
|
872
|
+
* it into the correct form if it's not already there (dtype, non-ref, dense). Returns a pair of the NMATRIX* and a
|
|
873
|
+
* boolean. If the boolean is true, the calling function is responsible for calling nm_delete on the NMATRIX*.
|
|
874
|
+
* Otherwise, the NMATRIX* still belongs to Ruby and Ruby will free it.
|
|
875
|
+
*/
|
|
876
|
+
std::pair<NMATRIX*,bool> interpret_arg_as_dense_nmatrix(VALUE right, nm::dtype_t dtype) {
|
|
877
|
+
NM_CONSERVATIVE(nm_register_value(&right));
|
|
878
|
+
if (IsNMatrixType(right)) {
|
|
879
|
+
NMATRIX *r;
|
|
880
|
+
if (NM_STYPE(right) != DENSE_STORE || NM_DTYPE(right) != dtype || NM_SRC(right) != NM_STORAGE(right)) {
|
|
881
|
+
UnwrapNMatrix( right, r );
|
|
882
|
+
NMATRIX* ldtype_r = nm_cast_with_ctype_args(r, nm::DENSE_STORE, dtype, NULL);
|
|
883
|
+
NM_CONSERVATIVE(nm_unregister_value(&right));
|
|
884
|
+
return std::make_pair(ldtype_r,true);
|
|
885
|
+
} else { // simple case -- right-hand matrix is dense and is not a reference and has same dtype
|
|
886
|
+
UnwrapNMatrix( right, r );
|
|
887
|
+
NM_CONSERVATIVE(nm_unregister_value(&right));
|
|
888
|
+
return std::make_pair(r, false);
|
|
889
|
+
}
|
|
890
|
+
// Do not set v_alloc = true for either of these. It is the responsibility of r/ldtype_r
|
|
891
|
+
} else if (RB_TYPE_P(right, T_DATA)) {
|
|
892
|
+
NM_CONSERVATIVE(nm_unregister_value(&right));
|
|
893
|
+
rb_raise(rb_eTypeError, "unrecognized type for slice assignment");
|
|
894
|
+
}
|
|
895
|
+
|
|
896
|
+
NM_CONSERVATIVE(nm_unregister_value(&right));
|
|
897
|
+
return std::make_pair<NMATRIX*,bool>(NULL, false);
|
|
898
|
+
}
|
|
899
|
+
|
|
900
|
+
|
|
901
|
+
namespace dense_storage {
|
|
902
|
+
|
|
903
|
+
/////////////////////////
|
|
904
|
+
// Templated Functions //
|
|
905
|
+
/////////////////////////
|
|
906
|
+
|
|
907
|
+
template<typename LDType, typename RDType>
|
|
908
|
+
void ref_slice_copy_transposed(const DENSE_STORAGE* rhs, DENSE_STORAGE* lhs) {
|
|
909
|
+
|
|
910
|
+
nm_dense_storage_register(rhs);
|
|
911
|
+
nm_dense_storage_register(lhs);
|
|
912
|
+
|
|
913
|
+
LDType* lhs_els = reinterpret_cast<LDType*>(lhs->elements);
|
|
914
|
+
RDType* rhs_els = reinterpret_cast<RDType*>(rhs->elements);
|
|
915
|
+
size_t count = nm_storage_count_max_elements(lhs);;
|
|
916
|
+
size_t* temp_coords = NM_ALLOCA_N(size_t, lhs->dim);
|
|
917
|
+
size_t coord_swap_temp;
|
|
918
|
+
|
|
919
|
+
while (count-- > 0) {
|
|
920
|
+
nm_dense_storage_coords(lhs, count, temp_coords);
|
|
921
|
+
NM_SWAP(temp_coords[0], temp_coords[1], coord_swap_temp);
|
|
922
|
+
size_t r_coord = nm_dense_storage_pos(rhs, temp_coords);
|
|
923
|
+
lhs_els[count] = rhs_els[r_coord];
|
|
924
|
+
}
|
|
925
|
+
|
|
926
|
+
nm_dense_storage_unregister(rhs);
|
|
927
|
+
nm_dense_storage_unregister(lhs);
|
|
928
|
+
|
|
929
|
+
}
|
|
930
|
+
|
|
931
|
+
template <typename LDType, typename RDType>
|
|
932
|
+
DENSE_STORAGE* cast_copy(const DENSE_STORAGE* rhs, dtype_t new_dtype) {
|
|
933
|
+
nm_dense_storage_register(rhs);
|
|
934
|
+
|
|
935
|
+
size_t count = nm_storage_count_max_elements(rhs);
|
|
936
|
+
|
|
937
|
+
size_t *shape = NM_ALLOC_N(size_t, rhs->dim);
|
|
938
|
+
memcpy(shape, rhs->shape, sizeof(size_t) * rhs->dim);
|
|
939
|
+
|
|
940
|
+
DENSE_STORAGE* lhs = nm_dense_storage_create(new_dtype, shape, rhs->dim, NULL, 0);
|
|
941
|
+
|
|
942
|
+
nm_dense_storage_register(lhs);
|
|
943
|
+
|
|
944
|
+
// Ensure that allocation worked before copying.
|
|
945
|
+
if (lhs && count) {
|
|
946
|
+
if (rhs->src != rhs) { // Make a copy of a ref to a matrix.
|
|
947
|
+
size_t* offset = NM_ALLOCA_N(size_t, rhs->dim);
|
|
948
|
+
memset(offset, 0, sizeof(size_t) * rhs->dim);
|
|
949
|
+
|
|
950
|
+
slice_copy(lhs, reinterpret_cast<const DENSE_STORAGE*>(rhs->src),
|
|
951
|
+
rhs->shape, 0,
|
|
952
|
+
nm_dense_storage_pos(rhs, offset), 0);
|
|
953
|
+
|
|
954
|
+
} else { // Make a regular copy.
|
|
955
|
+
RDType* rhs_els = reinterpret_cast<RDType*>(rhs->elements);
|
|
956
|
+
LDType* lhs_els = reinterpret_cast<LDType*>(lhs->elements);
|
|
957
|
+
|
|
958
|
+
for (size_t i = 0; i < count; ++i)
|
|
959
|
+
lhs_els[i] = rhs_els[i];
|
|
960
|
+
}
|
|
961
|
+
}
|
|
962
|
+
|
|
963
|
+
nm_dense_storage_unregister(rhs);
|
|
964
|
+
nm_dense_storage_unregister(lhs);
|
|
965
|
+
|
|
966
|
+
return lhs;
|
|
967
|
+
}
|
|
968
|
+
|
|
969
|
+
template <typename LDType, typename RDType>
|
|
970
|
+
bool eqeq(const DENSE_STORAGE* left, const DENSE_STORAGE* right) {
|
|
971
|
+
nm_dense_storage_register(left);
|
|
972
|
+
nm_dense_storage_register(right);
|
|
973
|
+
|
|
974
|
+
size_t index;
|
|
975
|
+
DENSE_STORAGE *tmp1, *tmp2;
|
|
976
|
+
tmp1 = NULL; tmp2 = NULL;
|
|
977
|
+
bool result = true;
|
|
978
|
+
/* FIXME: Very strange behavior! The GC calls the method directly with non-initialized data. */
|
|
979
|
+
|
|
980
|
+
LDType* left_elements = (LDType*)left->elements;
|
|
981
|
+
RDType* right_elements = (RDType*)right->elements;
|
|
982
|
+
|
|
983
|
+
// Copy elements in temp matrix if you have reference to the right.
|
|
984
|
+
if (left->src != left) {
|
|
985
|
+
tmp1 = nm_dense_storage_copy(left);
|
|
986
|
+
nm_dense_storage_register(tmp1);
|
|
987
|
+
left_elements = (LDType*)tmp1->elements;
|
|
988
|
+
}
|
|
989
|
+
if (right->src != right) {
|
|
990
|
+
tmp2 = nm_dense_storage_copy(right);
|
|
991
|
+
nm_dense_storage_register(tmp2);
|
|
992
|
+
right_elements = (RDType*)tmp2->elements;
|
|
993
|
+
}
|
|
994
|
+
|
|
995
|
+
|
|
996
|
+
|
|
997
|
+
for (index = nm_storage_count_max_elements(left); index-- > 0;) {
|
|
998
|
+
if (left_elements[index] != right_elements[index]) {
|
|
999
|
+
result = false;
|
|
1000
|
+
break;
|
|
1001
|
+
}
|
|
1002
|
+
}
|
|
1003
|
+
|
|
1004
|
+
if (tmp1) {
|
|
1005
|
+
nm_dense_storage_unregister(tmp1);
|
|
1006
|
+
NM_FREE(tmp1);
|
|
1007
|
+
}
|
|
1008
|
+
if (tmp2) {
|
|
1009
|
+
nm_dense_storage_unregister(tmp2);
|
|
1010
|
+
NM_FREE(tmp2);
|
|
1011
|
+
}
|
|
1012
|
+
|
|
1013
|
+
nm_dense_storage_unregister(left);
|
|
1014
|
+
nm_dense_storage_unregister(right);
|
|
1015
|
+
return result;
|
|
1016
|
+
}
|
|
1017
|
+
|
|
1018
|
+
template <typename DType>
|
|
1019
|
+
bool is_hermitian(const DENSE_STORAGE* mat, int lda) {
|
|
1020
|
+
unsigned int i, j;
|
|
1021
|
+
DType complex_conj;
|
|
1022
|
+
|
|
1023
|
+
const DType* els = (DType*) mat->elements;
|
|
1024
|
+
|
|
1025
|
+
for (i = mat->shape[0]; i-- > 0;) {
|
|
1026
|
+
for (j = i + 1; j < mat->shape[1]; ++j) {
|
|
1027
|
+
complex_conj = els[j*lda + i];
|
|
1028
|
+
complex_conj.i = -complex_conj.i;
|
|
1029
|
+
|
|
1030
|
+
if (els[i*lda+j] != complex_conj) {
|
|
1031
|
+
return false;
|
|
1032
|
+
}
|
|
1033
|
+
}
|
|
1034
|
+
}
|
|
1035
|
+
|
|
1036
|
+
return true;
|
|
1037
|
+
}
|
|
1038
|
+
|
|
1039
|
+
template <typename DType>
|
|
1040
|
+
bool is_symmetric(const DENSE_STORAGE* mat, int lda) {
|
|
1041
|
+
unsigned int i, j;
|
|
1042
|
+
const DType* els = (DType*) mat->elements;
|
|
1043
|
+
|
|
1044
|
+
for (i = mat->shape[0]; i-- > 0;) {
|
|
1045
|
+
for (j = i + 1; j < mat->shape[1]; ++j) {
|
|
1046
|
+
if (els[i*lda+j] != els[j*lda+i]) {
|
|
1047
|
+
return false;
|
|
1048
|
+
}
|
|
1049
|
+
}
|
|
1050
|
+
}
|
|
1051
|
+
|
|
1052
|
+
return true;
|
|
1053
|
+
}
|
|
1054
|
+
|
|
1055
|
+
|
|
1056
|
+
|
|
1057
|
+
/*
|
|
1058
|
+
* DType-templated matrix-matrix multiplication for dense storage.
|
|
1059
|
+
*/
|
|
1060
|
+
template <typename DType>
|
|
1061
|
+
static DENSE_STORAGE* matrix_multiply(const STORAGE_PAIR& casted_storage, size_t* resulting_shape, bool vector) {
|
|
1062
|
+
DENSE_STORAGE *left = (DENSE_STORAGE*)(casted_storage.left),
|
|
1063
|
+
*right = (DENSE_STORAGE*)(casted_storage.right);
|
|
1064
|
+
|
|
1065
|
+
nm_dense_storage_register(left);
|
|
1066
|
+
nm_dense_storage_register(right);
|
|
1067
|
+
|
|
1068
|
+
// Create result storage.
|
|
1069
|
+
DENSE_STORAGE* result = nm_dense_storage_create(left->dtype, resulting_shape, 2, NULL, 0);
|
|
1070
|
+
|
|
1071
|
+
nm_dense_storage_register(result);
|
|
1072
|
+
|
|
1073
|
+
DType *pAlpha = NM_ALLOCA_N(DType, 1),
|
|
1074
|
+
*pBeta = NM_ALLOCA_N(DType, 1);
|
|
1075
|
+
|
|
1076
|
+
*pAlpha = 1;
|
|
1077
|
+
*pBeta = 0;
|
|
1078
|
+
// Do the multiplication
|
|
1079
|
+
if (vector) nm::math::gemv<DType>(CblasNoTrans, left->shape[0], left->shape[1], pAlpha,
|
|
1080
|
+
reinterpret_cast<DType*>(left->elements), left->shape[1],
|
|
1081
|
+
reinterpret_cast<DType*>(right->elements), 1, pBeta,
|
|
1082
|
+
reinterpret_cast<DType*>(result->elements), 1);
|
|
1083
|
+
else nm::math::gemm<DType>(CblasRowMajor, CblasNoTrans, CblasNoTrans, left->shape[0], right->shape[1], left->shape[1],
|
|
1084
|
+
pAlpha, reinterpret_cast<DType*>(left->elements), left->shape[1],
|
|
1085
|
+
reinterpret_cast<DType*>(right->elements), right->shape[1], pBeta,
|
|
1086
|
+
reinterpret_cast<DType*>(result->elements), result->shape[1]);
|
|
1087
|
+
|
|
1088
|
+
|
|
1089
|
+
nm_dense_storage_unregister(left);
|
|
1090
|
+
nm_dense_storage_unregister(right);
|
|
1091
|
+
nm_dense_storage_unregister(result);
|
|
1092
|
+
|
|
1093
|
+
return result;
|
|
1094
|
+
}
|
|
1095
|
+
|
|
1096
|
+
}} // end of namespace nm::dense_storage
|