pnmatrix 1.2.4

Sign up to get free protection for your applications and to get access to all the features.
Files changed (111) hide show
  1. checksums.yaml +7 -0
  2. data/ext/nmatrix/binary_format.txt +53 -0
  3. data/ext/nmatrix/data/complex.h +388 -0
  4. data/ext/nmatrix/data/data.cpp +274 -0
  5. data/ext/nmatrix/data/data.h +651 -0
  6. data/ext/nmatrix/data/meta.h +64 -0
  7. data/ext/nmatrix/data/ruby_object.h +386 -0
  8. data/ext/nmatrix/extconf.rb +70 -0
  9. data/ext/nmatrix/math/asum.h +99 -0
  10. data/ext/nmatrix/math/cblas_enums.h +36 -0
  11. data/ext/nmatrix/math/cblas_templates_core.h +507 -0
  12. data/ext/nmatrix/math/gemm.h +241 -0
  13. data/ext/nmatrix/math/gemv.h +178 -0
  14. data/ext/nmatrix/math/getrf.h +255 -0
  15. data/ext/nmatrix/math/getrs.h +121 -0
  16. data/ext/nmatrix/math/imax.h +82 -0
  17. data/ext/nmatrix/math/laswp.h +165 -0
  18. data/ext/nmatrix/math/long_dtype.h +62 -0
  19. data/ext/nmatrix/math/magnitude.h +54 -0
  20. data/ext/nmatrix/math/math.h +751 -0
  21. data/ext/nmatrix/math/nrm2.h +165 -0
  22. data/ext/nmatrix/math/rot.h +117 -0
  23. data/ext/nmatrix/math/rotg.h +106 -0
  24. data/ext/nmatrix/math/scal.h +71 -0
  25. data/ext/nmatrix/math/trsm.h +336 -0
  26. data/ext/nmatrix/math/util.h +162 -0
  27. data/ext/nmatrix/math.cpp +1368 -0
  28. data/ext/nmatrix/nm_memory.h +60 -0
  29. data/ext/nmatrix/nmatrix.cpp +285 -0
  30. data/ext/nmatrix/nmatrix.h +476 -0
  31. data/ext/nmatrix/ruby_constants.cpp +151 -0
  32. data/ext/nmatrix/ruby_constants.h +106 -0
  33. data/ext/nmatrix/ruby_nmatrix.c +3130 -0
  34. data/ext/nmatrix/storage/common.cpp +77 -0
  35. data/ext/nmatrix/storage/common.h +183 -0
  36. data/ext/nmatrix/storage/dense/dense.cpp +1096 -0
  37. data/ext/nmatrix/storage/dense/dense.h +129 -0
  38. data/ext/nmatrix/storage/list/list.cpp +1628 -0
  39. data/ext/nmatrix/storage/list/list.h +138 -0
  40. data/ext/nmatrix/storage/storage.cpp +730 -0
  41. data/ext/nmatrix/storage/storage.h +99 -0
  42. data/ext/nmatrix/storage/yale/class.h +1139 -0
  43. data/ext/nmatrix/storage/yale/iterators/base.h +143 -0
  44. data/ext/nmatrix/storage/yale/iterators/iterator.h +131 -0
  45. data/ext/nmatrix/storage/yale/iterators/row.h +450 -0
  46. data/ext/nmatrix/storage/yale/iterators/row_stored.h +140 -0
  47. data/ext/nmatrix/storage/yale/iterators/row_stored_nd.h +169 -0
  48. data/ext/nmatrix/storage/yale/iterators/stored_diagonal.h +124 -0
  49. data/ext/nmatrix/storage/yale/math/transpose.h +110 -0
  50. data/ext/nmatrix/storage/yale/yale.cpp +2074 -0
  51. data/ext/nmatrix/storage/yale/yale.h +203 -0
  52. data/ext/nmatrix/types.h +55 -0
  53. data/ext/nmatrix/util/io.cpp +279 -0
  54. data/ext/nmatrix/util/io.h +115 -0
  55. data/ext/nmatrix/util/sl_list.cpp +627 -0
  56. data/ext/nmatrix/util/sl_list.h +144 -0
  57. data/ext/nmatrix/util/util.h +78 -0
  58. data/lib/nmatrix/blas.rb +378 -0
  59. data/lib/nmatrix/cruby/math.rb +744 -0
  60. data/lib/nmatrix/enumerate.rb +253 -0
  61. data/lib/nmatrix/homogeneous.rb +241 -0
  62. data/lib/nmatrix/io/fortran_format.rb +138 -0
  63. data/lib/nmatrix/io/harwell_boeing.rb +221 -0
  64. data/lib/nmatrix/io/market.rb +263 -0
  65. data/lib/nmatrix/io/point_cloud.rb +189 -0
  66. data/lib/nmatrix/jruby/decomposition.rb +24 -0
  67. data/lib/nmatrix/jruby/enumerable.rb +13 -0
  68. data/lib/nmatrix/jruby/error.rb +4 -0
  69. data/lib/nmatrix/jruby/math.rb +501 -0
  70. data/lib/nmatrix/jruby/nmatrix_java.rb +840 -0
  71. data/lib/nmatrix/jruby/operators.rb +283 -0
  72. data/lib/nmatrix/jruby/slice.rb +264 -0
  73. data/lib/nmatrix/lapack_core.rb +181 -0
  74. data/lib/nmatrix/lapack_plugin.rb +44 -0
  75. data/lib/nmatrix/math.rb +953 -0
  76. data/lib/nmatrix/mkmf.rb +100 -0
  77. data/lib/nmatrix/monkeys.rb +137 -0
  78. data/lib/nmatrix/nmatrix.rb +1172 -0
  79. data/lib/nmatrix/rspec.rb +75 -0
  80. data/lib/nmatrix/shortcuts.rb +1163 -0
  81. data/lib/nmatrix/version.rb +39 -0
  82. data/lib/nmatrix/yale_functions.rb +118 -0
  83. data/lib/nmatrix.rb +28 -0
  84. data/spec/00_nmatrix_spec.rb +892 -0
  85. data/spec/01_enum_spec.rb +196 -0
  86. data/spec/02_slice_spec.rb +407 -0
  87. data/spec/03_nmatrix_monkeys_spec.rb +80 -0
  88. data/spec/2x2_dense_double.mat +0 -0
  89. data/spec/4x4_sparse.mat +0 -0
  90. data/spec/4x5_dense.mat +0 -0
  91. data/spec/blas_spec.rb +215 -0
  92. data/spec/elementwise_spec.rb +311 -0
  93. data/spec/homogeneous_spec.rb +100 -0
  94. data/spec/io/fortran_format_spec.rb +88 -0
  95. data/spec/io/harwell_boeing_spec.rb +98 -0
  96. data/spec/io/test.rua +9 -0
  97. data/spec/io_spec.rb +159 -0
  98. data/spec/lapack_core_spec.rb +482 -0
  99. data/spec/leakcheck.rb +16 -0
  100. data/spec/math_spec.rb +1363 -0
  101. data/spec/nmatrix_yale_resize_test_associations.yaml +2802 -0
  102. data/spec/nmatrix_yale_spec.rb +286 -0
  103. data/spec/rspec_monkeys.rb +56 -0
  104. data/spec/rspec_spec.rb +35 -0
  105. data/spec/shortcuts_spec.rb +474 -0
  106. data/spec/slice_set_spec.rb +162 -0
  107. data/spec/spec_helper.rb +172 -0
  108. data/spec/stat_spec.rb +214 -0
  109. data/spec/test.pcd +20 -0
  110. data/spec/utm5940.mtx +83844 -0
  111. metadata +295 -0
@@ -0,0 +1,1096 @@
1
+ /////////////////////////////////////////////////////////////////////
2
+ // = NMatrix
3
+ //
4
+ // A linear algebra library for scientific computation in Ruby.
5
+ // NMatrix is part of SciRuby.
6
+ //
7
+ // NMatrix was originally inspired by and derived from NArray, by
8
+ // Masahiro Tanaka: http://narray.rubyforge.org
9
+ //
10
+ // == Copyright Information
11
+ //
12
+ // SciRuby is Copyright (c) 2010 - 2014, Ruby Science Foundation
13
+ // NMatrix is Copyright (c) 2012 - 2014, John Woods and the Ruby Science Foundation
14
+ //
15
+ // Please see LICENSE.txt for additional copyright notices.
16
+ //
17
+ // == Contributing
18
+ //
19
+ // By contributing source code to SciRuby, you agree to be bound by
20
+ // our Contributor Agreement:
21
+ //
22
+ // * https://github.com/SciRuby/sciruby/wiki/Contributor-Agreement
23
+ //
24
+ // == dense.c
25
+ //
26
+ // Dense n-dimensional matrix storage.
27
+
28
+ /*
29
+ * Standard Includes
30
+ */
31
+
32
+ #include <ruby.h>
33
+
34
+ /*
35
+ * Project Includes
36
+ */
37
+ #include "../../data/data.h"
38
+ #include "../../math/long_dtype.h"
39
+ #include "../../math/gemm.h"
40
+ #include "../../math/gemv.h"
41
+ #include "../../math/math.h"
42
+ #include "../common.h"
43
+ #include "dense.h"
44
+
45
+ /*
46
+ * Macros
47
+ */
48
+
49
+ /*
50
+ * Global Variables
51
+ */
52
+
53
+ /*
54
+ * Forward Declarations
55
+ */
56
+
57
+ namespace nm { namespace dense_storage {
58
+
59
+ template<typename LDType, typename RDType>
60
+ void ref_slice_copy_transposed(const DENSE_STORAGE* rhs, DENSE_STORAGE* lhs);
61
+
62
+ template <typename LDType, typename RDType>
63
+ DENSE_STORAGE* cast_copy(const DENSE_STORAGE* rhs, nm::dtype_t new_dtype);
64
+
65
+ template <typename LDType, typename RDType>
66
+ bool eqeq(const DENSE_STORAGE* left, const DENSE_STORAGE* right);
67
+
68
+ template <typename DType>
69
+ static DENSE_STORAGE* matrix_multiply(const STORAGE_PAIR& casted_storage, size_t* resulting_shape, bool vector);
70
+
71
+ template <typename DType>
72
+ bool is_hermitian(const DENSE_STORAGE* mat, int lda);
73
+
74
+ template <typename DType>
75
+ bool is_symmetric(const DENSE_STORAGE* mat, int lda);
76
+
77
+
78
+ /*
79
+ * Recursive slicing for N-dimensional matrix.
80
+ */
81
+ template <typename LDType, typename RDType>
82
+ static void slice_copy(DENSE_STORAGE *dest, const DENSE_STORAGE *src, size_t* lengths, size_t pdest, size_t psrc, size_t n) {
83
+ if (src->dim - n > 1) {
84
+ for (size_t i = 0; i < lengths[n]; ++i) {
85
+ slice_copy<LDType,RDType>(dest, src, lengths,
86
+ pdest + dest->stride[n]*i,
87
+ psrc + src->stride[n]*i,
88
+ n + 1);
89
+ }
90
+ } else {
91
+ for (size_t p = 0; p < dest->shape[n]; ++p) {
92
+ reinterpret_cast<LDType*>(dest->elements)[p+pdest] = reinterpret_cast<RDType*>(src->elements)[p+psrc];
93
+ }
94
+ /*memcpy((char*)dest->elements + pdest*DTYPE_SIZES[dest->dtype],
95
+ (char*)src->elements + psrc*DTYPE_SIZES[src->dtype],
96
+ dest->shape[n]*DTYPE_SIZES[dest->dtype]); */
97
+ }
98
+
99
+ }
100
+
101
+ /*
102
+ * Recursive function, sets multiple values in a matrix from a single source value. Same basic pattern as slice_copy.
103
+ */
104
+ template <typename D>
105
+ static void slice_set(DENSE_STORAGE* dest, size_t* lengths, size_t pdest, size_t rank, D* const v, size_t v_size, size_t& v_offset) {
106
+ if (dest->dim - rank > 1) {
107
+ for (size_t i = 0; i < lengths[rank]; ++i) {
108
+ slice_set<D>(dest, lengths, pdest + dest->stride[rank] * i, rank + 1, v, v_size, v_offset);
109
+ }
110
+ } else {
111
+ for (size_t p = 0; p < lengths[rank]; ++p, ++v_offset) {
112
+ if (v_offset >= v_size) v_offset %= v_size;
113
+
114
+ D* elem = reinterpret_cast<D*>(dest->elements);
115
+ elem[p + pdest] = v[v_offset];
116
+ }
117
+ }
118
+ }
119
+
120
+
121
+ /*
122
+ * Dense storage set/slice-set function, templated version.
123
+ */
124
+ template <typename D>
125
+ void set(VALUE left, SLICE* slice, VALUE right) {
126
+ NM_CONSERVATIVE(nm_register_value(&left));
127
+ NM_CONSERVATIVE(nm_register_value(&right));
128
+
129
+ DENSE_STORAGE* s = NM_STORAGE_DENSE(left);
130
+
131
+ std::pair<NMATRIX*,bool> nm_and_free =
132
+ interpret_arg_as_dense_nmatrix(right, s->dtype);
133
+
134
+ // Map the data onto D* v.
135
+ D* v;
136
+ size_t v_size = 1;
137
+
138
+ if (nm_and_free.first) {
139
+ DENSE_STORAGE* t = reinterpret_cast<DENSE_STORAGE*>(nm_and_free.first->storage);
140
+ v = reinterpret_cast<D*>(t->elements);
141
+ v_size = nm_storage_count_max_elements(t);
142
+
143
+ } else if (RB_TYPE_P(right, T_ARRAY)) {
144
+
145
+ v_size = RARRAY_LEN(right);
146
+ v = NM_ALLOC_N(D, v_size);
147
+ if (s->dtype == nm::RUBYOBJ)
148
+ nm_register_values(reinterpret_cast<VALUE*>(v), v_size);
149
+
150
+ for (size_t m = 0; m < v_size; ++m) {
151
+ rubyval_to_cval(rb_ary_entry(right, m), s->dtype, &(v[m]));
152
+ }
153
+
154
+ } else {
155
+ v = reinterpret_cast<D*>(rubyobj_to_cval(right, NM_DTYPE(left)));
156
+ if (s->dtype == nm::RUBYOBJ)
157
+ nm_register_values(reinterpret_cast<VALUE*>(v), v_size);
158
+ }
159
+
160
+ if (slice->single) {
161
+ reinterpret_cast<D*>(s->elements)[nm_dense_storage_pos(s, slice->coords)] = *v;
162
+ } else {
163
+ size_t v_offset = 0;
164
+ slice_set(s, slice->lengths, nm_dense_storage_pos(s, slice->coords), 0, v, v_size, v_offset);
165
+ }
166
+
167
+ // Only free v if it was allocated in this function.
168
+ if (nm_and_free.first) {
169
+ if (nm_and_free.second) {
170
+ nm_delete(nm_and_free.first);
171
+ }
172
+ } else {
173
+ if (s->dtype == nm::RUBYOBJ)
174
+ nm_unregister_values(reinterpret_cast<VALUE*>(v), v_size);
175
+ NM_FREE(v);
176
+ }
177
+ NM_CONSERVATIVE(nm_unregister_value(&left));
178
+ NM_CONSERVATIVE(nm_unregister_value(&right));
179
+
180
+ }
181
+
182
+ }} // end of namespace nm::dense_storage
183
+
184
+
185
+ extern "C" {
186
+
187
+ static size_t* stride(size_t* shape, size_t dim);
188
+ static void slice_copy(DENSE_STORAGE *dest, const DENSE_STORAGE *src, size_t* lengths, size_t pdest, size_t psrc, size_t n);
189
+
190
+ /*
191
+ * Functions
192
+ */
193
+
194
+ ///////////////
195
+ // Lifecycle //
196
+ ///////////////
197
+
198
+
199
+ /*
200
+ * This creates a dummy with all the properties of dense storage, but no actual elements allocation.
201
+ *
202
+ * elements will be NULL when this function finishes. You can clean up with nm_dense_storage_delete, which will
203
+ * check for that NULL pointer before freeing elements.
204
+ */
205
+ static DENSE_STORAGE* nm_dense_storage_create_dummy(nm::dtype_t dtype, size_t* shape, size_t dim) {
206
+ DENSE_STORAGE* s = NM_ALLOC( DENSE_STORAGE );
207
+
208
+ s->dim = dim;
209
+ s->shape = shape;
210
+ s->dtype = dtype;
211
+
212
+ s->offset = NM_ALLOC_N(size_t, dim);
213
+ memset(s->offset, 0, sizeof(size_t)*dim);
214
+
215
+ s->stride = stride(shape, dim);
216
+ s->count = 1;
217
+ s->src = s;
218
+
219
+ s->elements = NULL;
220
+
221
+ return s;
222
+ }
223
+
224
+
225
+ /*
226
+ * Note that elements and elements_length are for initial value(s) passed in.
227
+ * If they are the correct length, they will be used directly. If not, they
228
+ * will be concatenated over and over again into a new elements array. If
229
+ * elements is NULL, the new elements array will not be initialized.
230
+ */
231
+ DENSE_STORAGE* nm_dense_storage_create(nm::dtype_t dtype, size_t* shape, size_t dim, void* elements, size_t elements_length) {
232
+ if (dtype == nm::RUBYOBJ)
233
+ nm_register_values(reinterpret_cast<VALUE*>(elements), elements_length);
234
+
235
+ DENSE_STORAGE* s = nm_dense_storage_create_dummy(dtype, shape, dim);
236
+ size_t count = nm_storage_count_max_elements(s);
237
+
238
+ if (elements_length == count) {
239
+ s->elements = elements;
240
+
241
+ if (dtype == nm::RUBYOBJ)
242
+ nm_unregister_values(reinterpret_cast<VALUE*>(elements), elements_length);
243
+
244
+ } else {
245
+
246
+ s->elements = NM_ALLOC_N(char, DTYPE_SIZES[dtype]*count);
247
+
248
+ if (dtype == nm::RUBYOBJ)
249
+ nm_unregister_values(reinterpret_cast<VALUE*>(elements), elements_length);
250
+
251
+ size_t copy_length = elements_length;
252
+
253
+ if (elements_length > 0) {
254
+ // Repeat elements over and over again until the end of the matrix.
255
+ for (size_t i = 0; i < count; i += elements_length) {
256
+
257
+ if (i + elements_length > count) {
258
+ copy_length = count - i;
259
+ }
260
+
261
+ memcpy((char*)(s->elements)+i*DTYPE_SIZES[dtype], (char*)(elements)+(i % elements_length)*DTYPE_SIZES[dtype], copy_length*DTYPE_SIZES[dtype]);
262
+ }
263
+
264
+ // Get rid of the init_val.
265
+ NM_FREE(elements);
266
+ }
267
+ }
268
+
269
+ return s;
270
+ }
271
+
272
+
273
+ /*
274
+ * Destructor for dense storage. Make sure when you update this you also update nm_dense_storage_delete_dummy.
275
+ */
276
+ void nm_dense_storage_delete(STORAGE* s) {
277
+ // Sometimes Ruby passes in NULL storage for some reason (probably on copy construction failure).
278
+ if (s) {
279
+ DENSE_STORAGE* storage = (DENSE_STORAGE*)s;
280
+ if(storage->count-- == 1) {
281
+ NM_FREE(storage->shape);
282
+ NM_FREE(storage->offset);
283
+ NM_FREE(storage->stride);
284
+ if (storage->elements != NULL) {// happens with dummy objects
285
+ NM_FREE(storage->elements);
286
+ }
287
+ NM_FREE(storage);
288
+ }
289
+ }
290
+ }
291
+
292
+ /*
293
+ * Destructor for dense storage references (slicing).
294
+ */
295
+ void nm_dense_storage_delete_ref(STORAGE* s) {
296
+ // Sometimes Ruby passes in NULL storage for some reason (probably on copy construction failure).
297
+ if (s) {
298
+ DENSE_STORAGE* storage = (DENSE_STORAGE*)s;
299
+ nm_dense_storage_delete( reinterpret_cast<STORAGE*>(storage->src) );
300
+ NM_FREE(storage->shape);
301
+ NM_FREE(storage->offset);
302
+ NM_FREE(storage);
303
+ }
304
+ }
305
+
306
+ /*
307
+ * Mark values in a dense matrix for garbage collection. This may not be necessary -- further testing required.
308
+ */
309
+ void nm_dense_storage_mark(STORAGE* storage_base) {
310
+
311
+ DENSE_STORAGE* storage = (DENSE_STORAGE*)storage_base;
312
+
313
+ if (storage && storage->dtype == nm::RUBYOBJ) {
314
+ VALUE* els = reinterpret_cast<VALUE*>(storage->elements);
315
+
316
+ if (els) {
317
+ rb_gc_mark_locations(els, &(els[nm_storage_count_max_elements(storage)-1]));
318
+ }
319
+ //for (size_t index = nm_storage_count_max_elements(storage); index-- > 0;) {
320
+ // rb_gc_mark(els[index]);
321
+ //}
322
+ }
323
+ }
324
+
325
+ /**
326
+ * Register a dense storage struct as in-use to avoid garbage collection of the
327
+ * elements stored.
328
+ *
329
+ * This function will check dtype and ignore non-object dtype, so its safe to pass any dense storage in.
330
+ *
331
+ */
332
+ void nm_dense_storage_register(const STORAGE* s) {
333
+ const DENSE_STORAGE* storage = reinterpret_cast<const DENSE_STORAGE*>(s);
334
+ if (storage->dtype == nm::RUBYOBJ && storage->elements) {
335
+ nm_register_values(reinterpret_cast<VALUE*>(storage->elements), nm_storage_count_max_elements(storage));
336
+ }
337
+ }
338
+
339
+ /**
340
+ * Unregister a dense storage struct to allow normal garbage collection of the
341
+ * elements stored.
342
+ *
343
+ * This function will check dtype and ignore non-object dtype, so its safe to pass any dense storage in.
344
+ *
345
+ */
346
+ void nm_dense_storage_unregister(const STORAGE* s) {
347
+ const DENSE_STORAGE* storage = reinterpret_cast<const DENSE_STORAGE*>(s);
348
+ if (storage->dtype == nm::RUBYOBJ && storage->elements) {
349
+ nm_unregister_values(reinterpret_cast<VALUE*>(storage->elements), nm_storage_count_max_elements(storage));
350
+ }
351
+ }
352
+
353
+ ///////////////
354
+ // Accessors //
355
+ ///////////////
356
+
357
+
358
+
359
+ /*
360
+ * map_pair iterator for dense matrices (for element-wise operations)
361
+ */
362
+ VALUE nm_dense_map_pair(VALUE self, VALUE right) {
363
+
364
+ NM_CONSERVATIVE(nm_register_value(&self));
365
+ NM_CONSERVATIVE(nm_register_value(&right));
366
+
367
+ RETURN_SIZED_ENUMERATOR_PRE
368
+ NM_CONSERVATIVE(nm_unregister_value(&right));
369
+ NM_CONSERVATIVE(nm_unregister_value(&self));
370
+ RETURN_SIZED_ENUMERATOR(self, 0, 0, nm_enumerator_length);
371
+
372
+ DENSE_STORAGE *s = NM_STORAGE_DENSE(self),
373
+ *t = NM_STORAGE_DENSE(right);
374
+
375
+ size_t* coords = NM_ALLOCA_N(size_t, s->dim);
376
+ memset(coords, 0, sizeof(size_t) * s->dim);
377
+
378
+ size_t *shape_copy = NM_ALLOC_N(size_t, s->dim);
379
+ memcpy(shape_copy, s->shape, sizeof(size_t) * s->dim);
380
+
381
+ size_t count = nm_storage_count_max_elements(s);
382
+
383
+ DENSE_STORAGE* result = nm_dense_storage_create(nm::RUBYOBJ, shape_copy, s->dim, NULL, 0);
384
+
385
+ VALUE* result_elem = reinterpret_cast<VALUE*>(result->elements);
386
+ nm_dense_storage_register(result);
387
+
388
+ for (size_t k = 0; k < count; ++k) {
389
+ nm_dense_storage_coords(result, k, coords);
390
+ size_t s_index = nm_dense_storage_pos(s, coords),
391
+ t_index = nm_dense_storage_pos(t, coords);
392
+
393
+ VALUE sval = NM_DTYPE(self) == nm::RUBYOBJ ? reinterpret_cast<VALUE*>(s->elements)[s_index] : nm::rubyobj_from_cval((char*)(s->elements) + s_index*DTYPE_SIZES[NM_DTYPE(self)], NM_DTYPE(self)).rval;
394
+ nm_register_value(&sval);
395
+ VALUE tval = NM_DTYPE(right) == nm::RUBYOBJ ? reinterpret_cast<VALUE*>(t->elements)[t_index] : nm::rubyobj_from_cval((char*)(t->elements) + t_index*DTYPE_SIZES[NM_DTYPE(right)], NM_DTYPE(right)).rval;
396
+ result_elem[k] = rb_yield_values(2, sval, tval);
397
+ nm_unregister_value(&sval);
398
+ }
399
+
400
+ VALUE klass = CLASS_OF(self);
401
+ NMATRIX* m = nm_create(nm::DENSE_STORE, reinterpret_cast<STORAGE*>(result));
402
+ nm_register_nmatrix(m);
403
+ VALUE to_return = Data_Wrap_Struct(klass, nm_mark, nm_delete, m);
404
+
405
+ nm_unregister_nmatrix(m);
406
+ nm_dense_storage_unregister(result);
407
+ NM_CONSERVATIVE(nm_unregister_value(&self));
408
+ NM_CONSERVATIVE(nm_unregister_value(&right));
409
+
410
+ return to_return;
411
+
412
+ }
413
+
414
+ /*
415
+ * map enumerator for dense matrices.
416
+ */
417
+ VALUE nm_dense_map(VALUE self) {
418
+
419
+ NM_CONSERVATIVE(nm_register_value(&self));
420
+
421
+ RETURN_SIZED_ENUMERATOR_PRE
422
+ NM_CONSERVATIVE(nm_unregister_value(&self));
423
+ RETURN_SIZED_ENUMERATOR(self, 0, 0, nm_enumerator_length);
424
+
425
+ DENSE_STORAGE *s = NM_STORAGE_DENSE(self);
426
+
427
+ size_t* coords = NM_ALLOCA_N(size_t, s->dim);
428
+ memset(coords, 0, sizeof(size_t) * s->dim);
429
+
430
+ size_t *shape_copy = NM_ALLOC_N(size_t, s->dim);
431
+ memcpy(shape_copy, s->shape, sizeof(size_t) * s->dim);
432
+
433
+ size_t count = nm_storage_count_max_elements(s);
434
+
435
+ DENSE_STORAGE* result = nm_dense_storage_create(nm::RUBYOBJ, shape_copy, s->dim, NULL, 0);
436
+
437
+ VALUE* result_elem = reinterpret_cast<VALUE*>(result->elements);
438
+
439
+ nm_dense_storage_register(result);
440
+
441
+ for (size_t k = 0; k < count; ++k) {
442
+ nm_dense_storage_coords(result, k, coords);
443
+ size_t s_index = nm_dense_storage_pos(s, coords);
444
+
445
+ result_elem[k] = rb_yield(NM_DTYPE(self) == nm::RUBYOBJ ? reinterpret_cast<VALUE*>(s->elements)[s_index] : nm::rubyobj_from_cval((char*)(s->elements) + s_index*DTYPE_SIZES[NM_DTYPE(self)], NM_DTYPE(self)).rval);
446
+ }
447
+
448
+ VALUE klass = CLASS_OF(self);
449
+
450
+ NMATRIX* m = nm_create(nm::DENSE_STORE, reinterpret_cast<STORAGE*>(result));
451
+ nm_register_nmatrix(m);
452
+
453
+ VALUE to_return = Data_Wrap_Struct(klass, nm_mark, nm_delete, m);
454
+
455
+ nm_unregister_nmatrix(m);
456
+ nm_dense_storage_unregister(result);
457
+ NM_CONSERVATIVE(nm_unregister_value(&self));
458
+
459
+ return to_return;
460
+ }
461
+
462
+
463
+ /*
464
+ * each_with_indices iterator for dense matrices.
465
+ */
466
+ VALUE nm_dense_each_with_indices(VALUE nmatrix) {
467
+
468
+ NM_CONSERVATIVE(nm_register_value(&nmatrix));
469
+
470
+ RETURN_SIZED_ENUMERATOR_PRE
471
+ NM_CONSERVATIVE(nm_unregister_value(&nmatrix));
472
+ RETURN_SIZED_ENUMERATOR(nmatrix, 0, 0, nm_enumerator_length); // fourth argument only used by Ruby2+
473
+ DENSE_STORAGE* s = NM_STORAGE_DENSE(nmatrix);
474
+
475
+ // Create indices and initialize them to zero
476
+ size_t* coords = NM_ALLOCA_N(size_t, s->dim);
477
+ memset(coords, 0, sizeof(size_t) * s->dim);
478
+
479
+ size_t slice_index;
480
+ size_t* shape_copy = NM_ALLOC_N(size_t, s->dim);
481
+ memcpy(shape_copy, s->shape, sizeof(size_t) * s->dim);
482
+
483
+ DENSE_STORAGE* sliced_dummy = nm_dense_storage_create_dummy(s->dtype, shape_copy, s->dim);
484
+
485
+ for (size_t k = 0; k < nm_storage_count_max_elements(s); ++k) {
486
+ nm_dense_storage_coords(sliced_dummy, k, coords);
487
+ slice_index = nm_dense_storage_pos(s, coords);
488
+ VALUE ary = rb_ary_new();
489
+ nm_register_value(&ary);
490
+ if (NM_DTYPE(nmatrix) == nm::RUBYOBJ) rb_ary_push(ary, reinterpret_cast<VALUE*>(s->elements)[slice_index]);
491
+ else rb_ary_push(ary, nm::rubyobj_from_cval((char*)(s->elements) + slice_index*DTYPE_SIZES[NM_DTYPE(nmatrix)], NM_DTYPE(nmatrix)).rval);
492
+
493
+ for (size_t p = 0; p < s->dim; ++p) {
494
+ rb_ary_push(ary, INT2FIX(coords[p]));
495
+ }
496
+
497
+ // yield the array which now consists of the value and the indices
498
+ rb_yield(ary);
499
+ nm_unregister_value(&ary);
500
+ }
501
+
502
+ nm_dense_storage_delete(sliced_dummy);
503
+
504
+ NM_CONSERVATIVE(nm_unregister_value(&nmatrix));
505
+
506
+ return nmatrix;
507
+
508
+ }
509
+
510
+
511
+ /*
512
+ * Borrowed this function from NArray. Handles 'each' iteration on a dense
513
+ * matrix.
514
+ *
515
+ * Additionally, handles separately matrices containing VALUEs and matrices
516
+ * containing other types of data.
517
+ */
518
+ VALUE nm_dense_each(VALUE nmatrix) {
519
+
520
+ NM_CONSERVATIVE(nm_register_value(&nmatrix));
521
+
522
+ RETURN_SIZED_ENUMERATOR_PRE
523
+ NM_CONSERVATIVE(nm_unregister_value(&nmatrix));
524
+ RETURN_SIZED_ENUMERATOR(nmatrix, 0, 0, nm_enumerator_length);
525
+
526
+ DENSE_STORAGE* s = NM_STORAGE_DENSE(nmatrix);
527
+
528
+ size_t* temp_coords = NM_ALLOCA_N(size_t, s->dim);
529
+ size_t sliced_index;
530
+ size_t* shape_copy = NM_ALLOC_N(size_t, s->dim);
531
+ memcpy(shape_copy, s->shape, sizeof(size_t) * s->dim);
532
+ DENSE_STORAGE* sliced_dummy = nm_dense_storage_create_dummy(s->dtype, shape_copy, s->dim);
533
+
534
+ if (NM_DTYPE(nmatrix) == nm::RUBYOBJ) {
535
+
536
+ // matrix of Ruby objects -- yield those objects directly
537
+ for (size_t i = 0; i < nm_storage_count_max_elements(s); ++i) {
538
+ nm_dense_storage_coords(sliced_dummy, i, temp_coords);
539
+ sliced_index = nm_dense_storage_pos(s, temp_coords);
540
+ rb_yield( reinterpret_cast<VALUE*>(s->elements)[sliced_index] );
541
+ }
542
+
543
+ } else {
544
+
545
+ // We're going to copy the matrix element into a Ruby VALUE and then operate on it. This way user can't accidentally
546
+ // modify it and cause a seg fault.
547
+ for (size_t i = 0; i < nm_storage_count_max_elements(s); ++i) {
548
+ nm_dense_storage_coords(sliced_dummy, i, temp_coords);
549
+ sliced_index = nm_dense_storage_pos(s, temp_coords);
550
+ VALUE v = nm::rubyobj_from_cval((char*)(s->elements) + sliced_index*DTYPE_SIZES[NM_DTYPE(nmatrix)], NM_DTYPE(nmatrix)).rval;
551
+ rb_yield( v ); // yield to the copy we made
552
+ }
553
+ }
554
+
555
+ nm_dense_storage_delete(sliced_dummy);
556
+ NM_CONSERVATIVE(nm_unregister_value(&nmatrix));
557
+
558
+ return nmatrix;
559
+
560
+ }
561
+
562
+
563
+ /*
564
+ * Non-templated version of nm::dense_storage::slice_copy
565
+ */
566
+ static void slice_copy(DENSE_STORAGE *dest, const DENSE_STORAGE *src, size_t* lengths, size_t pdest, size_t psrc, size_t n) {
567
+ NAMED_LR_DTYPE_TEMPLATE_TABLE(slice_copy_table, nm::dense_storage::slice_copy, void, DENSE_STORAGE*, const DENSE_STORAGE*, size_t*, size_t, size_t, size_t)
568
+
569
+ slice_copy_table[dest->dtype][src->dtype](dest, src, lengths, pdest, psrc, n);
570
+ }
571
+
572
+
573
+ /*
574
+ * Get a slice or one element, using copying.
575
+ *
576
+ * FIXME: Template the first condition.
577
+ */
578
+ void* nm_dense_storage_get(const STORAGE* storage, SLICE* slice) {
579
+ DENSE_STORAGE* s = (DENSE_STORAGE*)storage;
580
+ if (slice->single)
581
+ return (char*)(s->elements) + nm_dense_storage_pos(s, slice->coords) * DTYPE_SIZES[s->dtype];
582
+ else {
583
+ nm_dense_storage_register(s);
584
+ size_t *shape = NM_ALLOC_N(size_t, s->dim);
585
+ for (size_t i = 0; i < s->dim; ++i) {
586
+ shape[i] = slice->lengths[i];
587
+ }
588
+
589
+ DENSE_STORAGE* ns = nm_dense_storage_create(s->dtype, shape, s->dim, NULL, 0);
590
+
591
+ slice_copy(ns,
592
+ reinterpret_cast<const DENSE_STORAGE*>(s->src),
593
+ slice->lengths,
594
+ 0,
595
+ nm_dense_storage_pos(s, slice->coords),
596
+ 0);
597
+
598
+ nm_dense_storage_unregister(s);
599
+ return ns;
600
+ }
601
+ }
602
+
603
+ /*
604
+ * Get a slice or one element by reference (no copy).
605
+ *
606
+ * FIXME: Template the first condition.
607
+ */
608
+ void* nm_dense_storage_ref(const STORAGE* storage, SLICE* slice) {
609
+ DENSE_STORAGE* s = (DENSE_STORAGE*)storage;
610
+
611
+ if (slice->single)
612
+ return (char*)(s->elements) + nm_dense_storage_pos(s, slice->coords) * DTYPE_SIZES[s->dtype];
613
+
614
+ else {
615
+ nm_dense_storage_register(s);
616
+ DENSE_STORAGE* ns = NM_ALLOC( DENSE_STORAGE );
617
+ ns->dim = s->dim;
618
+ ns->dtype = s->dtype;
619
+ ns->offset = NM_ALLOC_N(size_t, ns->dim);
620
+ ns->shape = NM_ALLOC_N(size_t, ns->dim);
621
+
622
+ for (size_t i = 0; i < ns->dim; ++i) {
623
+ ns->offset[i] = slice->coords[i] + s->offset[i];
624
+ ns->shape[i] = slice->lengths[i];
625
+ }
626
+
627
+ ns->stride = s->stride;
628
+ ns->elements = s->elements;
629
+
630
+ s->src->count++;
631
+ ns->src = s->src;
632
+
633
+ nm_dense_storage_unregister(s);
634
+ return ns;
635
+ }
636
+ }
637
+
638
+
639
+
640
+
641
+ /*
642
+ * Set a value or values in a dense matrix. Requires that right be either a single value or an NMatrix (ref or real).
643
+ */
644
+ void nm_dense_storage_set(VALUE left, SLICE* slice, VALUE right) {
645
+ NAMED_DTYPE_TEMPLATE_TABLE(ttable, nm::dense_storage::set, void, VALUE, SLICE*, VALUE)
646
+ nm::dtype_t dtype = NM_DTYPE(left);
647
+ ttable[dtype](left, slice, right);
648
+ }
649
+
650
+
651
+ ///////////
652
+ // Tests //
653
+ ///////////
654
+
655
+ /*
656
+ * Do these two dense matrices have the same contents?
657
+ *
658
+ * TODO: Test the shape of the two matrices.
659
+ * TODO: See if using memcmp is faster when the left- and right-hand matrices
660
+ * have the same dtype.
661
+ */
662
+ bool nm_dense_storage_eqeq(const STORAGE* left, const STORAGE* right) {
663
+ LR_DTYPE_TEMPLATE_TABLE(nm::dense_storage::eqeq, bool, const DENSE_STORAGE*, const DENSE_STORAGE*)
664
+
665
+ if (!ttable[left->dtype][right->dtype]) {
666
+ rb_raise(nm_eDataTypeError, "comparison between these dtypes is undefined");
667
+ return false;
668
+ }
669
+
670
+ return ttable[left->dtype][right->dtype]((const DENSE_STORAGE*)left, (const DENSE_STORAGE*)right);
671
+ }
672
+
673
+ /*
674
+ * Test to see if the matrix is Hermitian. If the matrix does not have a
675
+ * dtype of Complex64 or Complex128 this is the same as testing for symmetry.
676
+ */
677
+ bool nm_dense_storage_is_hermitian(const DENSE_STORAGE* mat, int lda) {
678
+ if (mat->dtype == nm::COMPLEX64) {
679
+ return nm::dense_storage::is_hermitian<nm::Complex64>(mat, lda);
680
+
681
+ } else if (mat->dtype == nm::COMPLEX128) {
682
+ return nm::dense_storage::is_hermitian<nm::Complex128>(mat, lda);
683
+
684
+ } else {
685
+ return nm_dense_storage_is_symmetric(mat, lda);
686
+ }
687
+ }
688
+
689
+ /*
690
+ * Is this dense matrix symmetric about the diagonal?
691
+ */
692
+ bool nm_dense_storage_is_symmetric(const DENSE_STORAGE* mat, int lda) {
693
+ DTYPE_TEMPLATE_TABLE(nm::dense_storage::is_symmetric, bool, const DENSE_STORAGE*, int);
694
+
695
+ return ttable[mat->dtype](mat, lda);
696
+ }
697
+
698
+ //////////
699
+ // Math //
700
+ //////////
701
+
702
+
703
+ /*
704
+ * Dense matrix-matrix multiplication.
705
+ */
706
+ STORAGE* nm_dense_storage_matrix_multiply(const STORAGE_PAIR& casted_storage, size_t* resulting_shape, bool vector) {
707
+ DTYPE_TEMPLATE_TABLE(nm::dense_storage::matrix_multiply, DENSE_STORAGE*, const STORAGE_PAIR& casted_storage, size_t* resulting_shape, bool vector);
708
+
709
+ return ttable[casted_storage.left->dtype](casted_storage, resulting_shape, vector);
710
+ }
711
+
712
+ /////////////
713
+ // Utility //
714
+ /////////////
715
+
716
+ /*
717
+ * Determine the linear array position (in elements of s) of some set of coordinates
718
+ * (given by slice).
719
+ */
720
+ size_t nm_dense_storage_pos(const DENSE_STORAGE* s, const size_t* coords) {
721
+ size_t pos = 0;
722
+
723
+ for (size_t i = 0; i < s->dim; ++i)
724
+ pos += (coords[i] + s->offset[i]) * s->stride[i];
725
+
726
+ return pos;
727
+
728
+ }
729
+
730
+ /*
731
+ * Determine the a set of slice coordinates from linear array position (in elements
732
+ * of s) of some set of coordinates (given by slice). (Inverse of
733
+ * nm_dense_storage_pos).
734
+ *
735
+ * The parameter coords_out should be a pre-allocated array of size equal to s->dim.
736
+ */
737
+ void nm_dense_storage_coords(const DENSE_STORAGE* s, const size_t slice_pos, size_t* coords_out) {
738
+
739
+ size_t temp_pos = slice_pos;
740
+
741
+ for (size_t i = 0; i < s->dim; ++i) {
742
+ coords_out[i] = (temp_pos - temp_pos % s->stride[i])/s->stride[i] - s->offset[i];
743
+ temp_pos = temp_pos % s->stride[i];
744
+ }
745
+ }
746
+
747
+ /*
748
+ * Calculate the stride length.
749
+ */
750
+ static size_t* stride(size_t* shape, size_t dim) {
751
+ size_t i, j;
752
+ size_t* stride = NM_ALLOC_N(size_t, dim);
753
+
754
+ for (i = 0; i < dim; ++i) {
755
+ stride[i] = 1;
756
+ for (j = i+1; j < dim; ++j) {
757
+ stride[i] *= shape[j];
758
+ }
759
+ }
760
+
761
+ return stride;
762
+ }
763
+
764
+
765
+ /////////////////////////
766
+ // Copying and Casting //
767
+ /////////////////////////
768
+
769
+ /*
770
+ * Copy dense storage, changing dtype if necessary.
771
+ */
772
+ STORAGE* nm_dense_storage_cast_copy(const STORAGE* rhs, nm::dtype_t new_dtype, void* dummy) {
773
+ NAMED_LR_DTYPE_TEMPLATE_TABLE(ttable, nm::dense_storage::cast_copy, DENSE_STORAGE*, const DENSE_STORAGE* rhs, nm::dtype_t new_dtype);
774
+
775
+ if (!ttable[new_dtype][rhs->dtype]) {
776
+ rb_raise(nm_eDataTypeError, "cast between these dtypes is undefined");
777
+ return NULL;
778
+ }
779
+
780
+ return (STORAGE*)ttable[new_dtype][rhs->dtype]((DENSE_STORAGE*)rhs, new_dtype);
781
+ }
782
+
783
+ /*
784
+ * Copy dense storage without a change in dtype.
785
+ */
786
+ DENSE_STORAGE* nm_dense_storage_copy(const DENSE_STORAGE* rhs) {
787
+ nm_dense_storage_register(rhs);
788
+
789
+ size_t count = 0;
790
+ size_t *shape = NM_ALLOC_N(size_t, rhs->dim);
791
+
792
+ // copy shape and offset
793
+ for (size_t i = 0; i < rhs->dim; ++i) {
794
+ shape[i] = rhs->shape[i];
795
+ }
796
+
797
+ DENSE_STORAGE* lhs = nm_dense_storage_create(rhs->dtype, shape, rhs->dim, NULL, 0);
798
+ count = nm_storage_count_max_elements(lhs);
799
+
800
+
801
+ // Ensure that allocation worked before copying.
802
+ if (lhs && count) {
803
+ if (rhs == rhs->src) // not a reference
804
+ memcpy(lhs->elements, rhs->elements, DTYPE_SIZES[rhs->dtype] * count);
805
+ else { // slice whole matrix
806
+ nm_dense_storage_register(lhs);
807
+ size_t *offset = NM_ALLOC_N(size_t, rhs->dim);
808
+ memset(offset, 0, sizeof(size_t) * rhs->dim);
809
+
810
+ slice_copy(lhs,
811
+ reinterpret_cast<const DENSE_STORAGE*>(rhs->src),
812
+ rhs->shape,
813
+ 0,
814
+ nm_dense_storage_pos(rhs, offset),
815
+ 0);
816
+
817
+ nm_dense_storage_unregister(lhs);
818
+ }
819
+ }
820
+
821
+ nm_dense_storage_unregister(rhs);
822
+
823
+ return lhs;
824
+ }
825
+
826
+
827
+ /*
828
+ * Transpose dense storage into a new dense storage object. Basically a copy constructor.
829
+ *
830
+ * Not much point in templating this as it's pretty straight-forward.
831
+ */
832
+ STORAGE* nm_dense_storage_copy_transposed(const STORAGE* rhs_base) {
833
+ DENSE_STORAGE* rhs = (DENSE_STORAGE*)rhs_base;
834
+
835
+ nm_dense_storage_register(rhs);
836
+ size_t *shape = NM_ALLOC_N(size_t, rhs->dim);
837
+
838
+ // swap shape
839
+ shape[0] = rhs->shape[1];
840
+ shape[1] = rhs->shape[0];
841
+
842
+ DENSE_STORAGE *lhs = nm_dense_storage_create(rhs->dtype, shape, rhs->dim, NULL, 0);
843
+
844
+ nm_dense_storage_register(lhs);
845
+
846
+ if (rhs_base->src == rhs_base) {
847
+ nm_math_transpose_generic(rhs->shape[0], rhs->shape[1], rhs->elements, rhs->shape[1], lhs->elements, lhs->shape[1], DTYPE_SIZES[rhs->dtype]);
848
+ } else {
849
+ NAMED_LR_DTYPE_TEMPLATE_TABLE(ttable, nm::dense_storage::ref_slice_copy_transposed, void, const DENSE_STORAGE* rhs, DENSE_STORAGE* lhs);
850
+
851
+ if (!ttable[lhs->dtype][rhs->dtype]) {
852
+ nm_dense_storage_unregister(rhs);
853
+ nm_dense_storage_unregister(lhs);
854
+ rb_raise(nm_eDataTypeError, "transposition between these dtypes is undefined");
855
+ }
856
+
857
+ ttable[lhs->dtype][rhs->dtype](rhs, lhs);
858
+ }
859
+
860
+ nm_dense_storage_unregister(rhs);
861
+ nm_dense_storage_unregister(lhs);
862
+
863
+ return (STORAGE*)lhs;
864
+ }
865
+
866
+ } // end of extern "C" block
867
+
868
+ namespace nm {
869
+
870
+ /*
871
+ * Used for slice setting. Takes the right-hand of the equal sign, a single VALUE, and massages
872
+ * it into the correct form if it's not already there (dtype, non-ref, dense). Returns a pair of the NMATRIX* and a
873
+ * boolean. If the boolean is true, the calling function is responsible for calling nm_delete on the NMATRIX*.
874
+ * Otherwise, the NMATRIX* still belongs to Ruby and Ruby will free it.
875
+ */
876
+ std::pair<NMATRIX*,bool> interpret_arg_as_dense_nmatrix(VALUE right, nm::dtype_t dtype) {
877
+ NM_CONSERVATIVE(nm_register_value(&right));
878
+ if (IsNMatrixType(right)) {
879
+ NMATRIX *r;
880
+ if (NM_STYPE(right) != DENSE_STORE || NM_DTYPE(right) != dtype || NM_SRC(right) != NM_STORAGE(right)) {
881
+ UnwrapNMatrix( right, r );
882
+ NMATRIX* ldtype_r = nm_cast_with_ctype_args(r, nm::DENSE_STORE, dtype, NULL);
883
+ NM_CONSERVATIVE(nm_unregister_value(&right));
884
+ return std::make_pair(ldtype_r,true);
885
+ } else { // simple case -- right-hand matrix is dense and is not a reference and has same dtype
886
+ UnwrapNMatrix( right, r );
887
+ NM_CONSERVATIVE(nm_unregister_value(&right));
888
+ return std::make_pair(r, false);
889
+ }
890
+ // Do not set v_alloc = true for either of these. It is the responsibility of r/ldtype_r
891
+ } else if (RB_TYPE_P(right, T_DATA)) {
892
+ NM_CONSERVATIVE(nm_unregister_value(&right));
893
+ rb_raise(rb_eTypeError, "unrecognized type for slice assignment");
894
+ }
895
+
896
+ NM_CONSERVATIVE(nm_unregister_value(&right));
897
+ return std::make_pair<NMATRIX*,bool>(NULL, false);
898
+ }
899
+
900
+
901
+ namespace dense_storage {
902
+
903
+ /////////////////////////
904
+ // Templated Functions //
905
+ /////////////////////////
906
+
907
+ template<typename LDType, typename RDType>
908
+ void ref_slice_copy_transposed(const DENSE_STORAGE* rhs, DENSE_STORAGE* lhs) {
909
+
910
+ nm_dense_storage_register(rhs);
911
+ nm_dense_storage_register(lhs);
912
+
913
+ LDType* lhs_els = reinterpret_cast<LDType*>(lhs->elements);
914
+ RDType* rhs_els = reinterpret_cast<RDType*>(rhs->elements);
915
+ size_t count = nm_storage_count_max_elements(lhs);;
916
+ size_t* temp_coords = NM_ALLOCA_N(size_t, lhs->dim);
917
+ size_t coord_swap_temp;
918
+
919
+ while (count-- > 0) {
920
+ nm_dense_storage_coords(lhs, count, temp_coords);
921
+ NM_SWAP(temp_coords[0], temp_coords[1], coord_swap_temp);
922
+ size_t r_coord = nm_dense_storage_pos(rhs, temp_coords);
923
+ lhs_els[count] = rhs_els[r_coord];
924
+ }
925
+
926
+ nm_dense_storage_unregister(rhs);
927
+ nm_dense_storage_unregister(lhs);
928
+
929
+ }
930
+
931
+ template <typename LDType, typename RDType>
932
+ DENSE_STORAGE* cast_copy(const DENSE_STORAGE* rhs, dtype_t new_dtype) {
933
+ nm_dense_storage_register(rhs);
934
+
935
+ size_t count = nm_storage_count_max_elements(rhs);
936
+
937
+ size_t *shape = NM_ALLOC_N(size_t, rhs->dim);
938
+ memcpy(shape, rhs->shape, sizeof(size_t) * rhs->dim);
939
+
940
+ DENSE_STORAGE* lhs = nm_dense_storage_create(new_dtype, shape, rhs->dim, NULL, 0);
941
+
942
+ nm_dense_storage_register(lhs);
943
+
944
+ // Ensure that allocation worked before copying.
945
+ if (lhs && count) {
946
+ if (rhs->src != rhs) { // Make a copy of a ref to a matrix.
947
+ size_t* offset = NM_ALLOCA_N(size_t, rhs->dim);
948
+ memset(offset, 0, sizeof(size_t) * rhs->dim);
949
+
950
+ slice_copy(lhs, reinterpret_cast<const DENSE_STORAGE*>(rhs->src),
951
+ rhs->shape, 0,
952
+ nm_dense_storage_pos(rhs, offset), 0);
953
+
954
+ } else { // Make a regular copy.
955
+ RDType* rhs_els = reinterpret_cast<RDType*>(rhs->elements);
956
+ LDType* lhs_els = reinterpret_cast<LDType*>(lhs->elements);
957
+
958
+ for (size_t i = 0; i < count; ++i)
959
+ lhs_els[i] = rhs_els[i];
960
+ }
961
+ }
962
+
963
+ nm_dense_storage_unregister(rhs);
964
+ nm_dense_storage_unregister(lhs);
965
+
966
+ return lhs;
967
+ }
968
+
969
+ template <typename LDType, typename RDType>
970
+ bool eqeq(const DENSE_STORAGE* left, const DENSE_STORAGE* right) {
971
+ nm_dense_storage_register(left);
972
+ nm_dense_storage_register(right);
973
+
974
+ size_t index;
975
+ DENSE_STORAGE *tmp1, *tmp2;
976
+ tmp1 = NULL; tmp2 = NULL;
977
+ bool result = true;
978
+ /* FIXME: Very strange behavior! The GC calls the method directly with non-initialized data. */
979
+
980
+ LDType* left_elements = (LDType*)left->elements;
981
+ RDType* right_elements = (RDType*)right->elements;
982
+
983
+ // Copy elements in temp matrix if you have reference to the right.
984
+ if (left->src != left) {
985
+ tmp1 = nm_dense_storage_copy(left);
986
+ nm_dense_storage_register(tmp1);
987
+ left_elements = (LDType*)tmp1->elements;
988
+ }
989
+ if (right->src != right) {
990
+ tmp2 = nm_dense_storage_copy(right);
991
+ nm_dense_storage_register(tmp2);
992
+ right_elements = (RDType*)tmp2->elements;
993
+ }
994
+
995
+
996
+
997
+ for (index = nm_storage_count_max_elements(left); index-- > 0;) {
998
+ if (left_elements[index] != right_elements[index]) {
999
+ result = false;
1000
+ break;
1001
+ }
1002
+ }
1003
+
1004
+ if (tmp1) {
1005
+ nm_dense_storage_unregister(tmp1);
1006
+ NM_FREE(tmp1);
1007
+ }
1008
+ if (tmp2) {
1009
+ nm_dense_storage_unregister(tmp2);
1010
+ NM_FREE(tmp2);
1011
+ }
1012
+
1013
+ nm_dense_storage_unregister(left);
1014
+ nm_dense_storage_unregister(right);
1015
+ return result;
1016
+ }
1017
+
1018
+ template <typename DType>
1019
+ bool is_hermitian(const DENSE_STORAGE* mat, int lda) {
1020
+ unsigned int i, j;
1021
+ DType complex_conj;
1022
+
1023
+ const DType* els = (DType*) mat->elements;
1024
+
1025
+ for (i = mat->shape[0]; i-- > 0;) {
1026
+ for (j = i + 1; j < mat->shape[1]; ++j) {
1027
+ complex_conj = els[j*lda + i];
1028
+ complex_conj.i = -complex_conj.i;
1029
+
1030
+ if (els[i*lda+j] != complex_conj) {
1031
+ return false;
1032
+ }
1033
+ }
1034
+ }
1035
+
1036
+ return true;
1037
+ }
1038
+
1039
+ template <typename DType>
1040
+ bool is_symmetric(const DENSE_STORAGE* mat, int lda) {
1041
+ unsigned int i, j;
1042
+ const DType* els = (DType*) mat->elements;
1043
+
1044
+ for (i = mat->shape[0]; i-- > 0;) {
1045
+ for (j = i + 1; j < mat->shape[1]; ++j) {
1046
+ if (els[i*lda+j] != els[j*lda+i]) {
1047
+ return false;
1048
+ }
1049
+ }
1050
+ }
1051
+
1052
+ return true;
1053
+ }
1054
+
1055
+
1056
+
1057
+ /*
1058
+ * DType-templated matrix-matrix multiplication for dense storage.
1059
+ */
1060
+ template <typename DType>
1061
+ static DENSE_STORAGE* matrix_multiply(const STORAGE_PAIR& casted_storage, size_t* resulting_shape, bool vector) {
1062
+ DENSE_STORAGE *left = (DENSE_STORAGE*)(casted_storage.left),
1063
+ *right = (DENSE_STORAGE*)(casted_storage.right);
1064
+
1065
+ nm_dense_storage_register(left);
1066
+ nm_dense_storage_register(right);
1067
+
1068
+ // Create result storage.
1069
+ DENSE_STORAGE* result = nm_dense_storage_create(left->dtype, resulting_shape, 2, NULL, 0);
1070
+
1071
+ nm_dense_storage_register(result);
1072
+
1073
+ DType *pAlpha = NM_ALLOCA_N(DType, 1),
1074
+ *pBeta = NM_ALLOCA_N(DType, 1);
1075
+
1076
+ *pAlpha = 1;
1077
+ *pBeta = 0;
1078
+ // Do the multiplication
1079
+ if (vector) nm::math::gemv<DType>(CblasNoTrans, left->shape[0], left->shape[1], pAlpha,
1080
+ reinterpret_cast<DType*>(left->elements), left->shape[1],
1081
+ reinterpret_cast<DType*>(right->elements), 1, pBeta,
1082
+ reinterpret_cast<DType*>(result->elements), 1);
1083
+ else nm::math::gemm<DType>(CblasRowMajor, CblasNoTrans, CblasNoTrans, left->shape[0], right->shape[1], left->shape[1],
1084
+ pAlpha, reinterpret_cast<DType*>(left->elements), left->shape[1],
1085
+ reinterpret_cast<DType*>(right->elements), right->shape[1], pBeta,
1086
+ reinterpret_cast<DType*>(result->elements), result->shape[1]);
1087
+
1088
+
1089
+ nm_dense_storage_unregister(left);
1090
+ nm_dense_storage_unregister(right);
1091
+ nm_dense_storage_unregister(result);
1092
+
1093
+ return result;
1094
+ }
1095
+
1096
+ }} // end of namespace nm::dense_storage