pnmatrix 1.2.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (111) hide show
  1. checksums.yaml +7 -0
  2. data/ext/nmatrix/binary_format.txt +53 -0
  3. data/ext/nmatrix/data/complex.h +388 -0
  4. data/ext/nmatrix/data/data.cpp +274 -0
  5. data/ext/nmatrix/data/data.h +651 -0
  6. data/ext/nmatrix/data/meta.h +64 -0
  7. data/ext/nmatrix/data/ruby_object.h +386 -0
  8. data/ext/nmatrix/extconf.rb +70 -0
  9. data/ext/nmatrix/math/asum.h +99 -0
  10. data/ext/nmatrix/math/cblas_enums.h +36 -0
  11. data/ext/nmatrix/math/cblas_templates_core.h +507 -0
  12. data/ext/nmatrix/math/gemm.h +241 -0
  13. data/ext/nmatrix/math/gemv.h +178 -0
  14. data/ext/nmatrix/math/getrf.h +255 -0
  15. data/ext/nmatrix/math/getrs.h +121 -0
  16. data/ext/nmatrix/math/imax.h +82 -0
  17. data/ext/nmatrix/math/laswp.h +165 -0
  18. data/ext/nmatrix/math/long_dtype.h +62 -0
  19. data/ext/nmatrix/math/magnitude.h +54 -0
  20. data/ext/nmatrix/math/math.h +751 -0
  21. data/ext/nmatrix/math/nrm2.h +165 -0
  22. data/ext/nmatrix/math/rot.h +117 -0
  23. data/ext/nmatrix/math/rotg.h +106 -0
  24. data/ext/nmatrix/math/scal.h +71 -0
  25. data/ext/nmatrix/math/trsm.h +336 -0
  26. data/ext/nmatrix/math/util.h +162 -0
  27. data/ext/nmatrix/math.cpp +1368 -0
  28. data/ext/nmatrix/nm_memory.h +60 -0
  29. data/ext/nmatrix/nmatrix.cpp +285 -0
  30. data/ext/nmatrix/nmatrix.h +476 -0
  31. data/ext/nmatrix/ruby_constants.cpp +151 -0
  32. data/ext/nmatrix/ruby_constants.h +106 -0
  33. data/ext/nmatrix/ruby_nmatrix.c +3130 -0
  34. data/ext/nmatrix/storage/common.cpp +77 -0
  35. data/ext/nmatrix/storage/common.h +183 -0
  36. data/ext/nmatrix/storage/dense/dense.cpp +1096 -0
  37. data/ext/nmatrix/storage/dense/dense.h +129 -0
  38. data/ext/nmatrix/storage/list/list.cpp +1628 -0
  39. data/ext/nmatrix/storage/list/list.h +138 -0
  40. data/ext/nmatrix/storage/storage.cpp +730 -0
  41. data/ext/nmatrix/storage/storage.h +99 -0
  42. data/ext/nmatrix/storage/yale/class.h +1139 -0
  43. data/ext/nmatrix/storage/yale/iterators/base.h +143 -0
  44. data/ext/nmatrix/storage/yale/iterators/iterator.h +131 -0
  45. data/ext/nmatrix/storage/yale/iterators/row.h +450 -0
  46. data/ext/nmatrix/storage/yale/iterators/row_stored.h +140 -0
  47. data/ext/nmatrix/storage/yale/iterators/row_stored_nd.h +169 -0
  48. data/ext/nmatrix/storage/yale/iterators/stored_diagonal.h +124 -0
  49. data/ext/nmatrix/storage/yale/math/transpose.h +110 -0
  50. data/ext/nmatrix/storage/yale/yale.cpp +2074 -0
  51. data/ext/nmatrix/storage/yale/yale.h +203 -0
  52. data/ext/nmatrix/types.h +55 -0
  53. data/ext/nmatrix/util/io.cpp +279 -0
  54. data/ext/nmatrix/util/io.h +115 -0
  55. data/ext/nmatrix/util/sl_list.cpp +627 -0
  56. data/ext/nmatrix/util/sl_list.h +144 -0
  57. data/ext/nmatrix/util/util.h +78 -0
  58. data/lib/nmatrix/blas.rb +378 -0
  59. data/lib/nmatrix/cruby/math.rb +744 -0
  60. data/lib/nmatrix/enumerate.rb +253 -0
  61. data/lib/nmatrix/homogeneous.rb +241 -0
  62. data/lib/nmatrix/io/fortran_format.rb +138 -0
  63. data/lib/nmatrix/io/harwell_boeing.rb +221 -0
  64. data/lib/nmatrix/io/market.rb +263 -0
  65. data/lib/nmatrix/io/point_cloud.rb +189 -0
  66. data/lib/nmatrix/jruby/decomposition.rb +24 -0
  67. data/lib/nmatrix/jruby/enumerable.rb +13 -0
  68. data/lib/nmatrix/jruby/error.rb +4 -0
  69. data/lib/nmatrix/jruby/math.rb +501 -0
  70. data/lib/nmatrix/jruby/nmatrix_java.rb +840 -0
  71. data/lib/nmatrix/jruby/operators.rb +283 -0
  72. data/lib/nmatrix/jruby/slice.rb +264 -0
  73. data/lib/nmatrix/lapack_core.rb +181 -0
  74. data/lib/nmatrix/lapack_plugin.rb +44 -0
  75. data/lib/nmatrix/math.rb +953 -0
  76. data/lib/nmatrix/mkmf.rb +100 -0
  77. data/lib/nmatrix/monkeys.rb +137 -0
  78. data/lib/nmatrix/nmatrix.rb +1172 -0
  79. data/lib/nmatrix/rspec.rb +75 -0
  80. data/lib/nmatrix/shortcuts.rb +1163 -0
  81. data/lib/nmatrix/version.rb +39 -0
  82. data/lib/nmatrix/yale_functions.rb +118 -0
  83. data/lib/nmatrix.rb +28 -0
  84. data/spec/00_nmatrix_spec.rb +892 -0
  85. data/spec/01_enum_spec.rb +196 -0
  86. data/spec/02_slice_spec.rb +407 -0
  87. data/spec/03_nmatrix_monkeys_spec.rb +80 -0
  88. data/spec/2x2_dense_double.mat +0 -0
  89. data/spec/4x4_sparse.mat +0 -0
  90. data/spec/4x5_dense.mat +0 -0
  91. data/spec/blas_spec.rb +215 -0
  92. data/spec/elementwise_spec.rb +311 -0
  93. data/spec/homogeneous_spec.rb +100 -0
  94. data/spec/io/fortran_format_spec.rb +88 -0
  95. data/spec/io/harwell_boeing_spec.rb +98 -0
  96. data/spec/io/test.rua +9 -0
  97. data/spec/io_spec.rb +159 -0
  98. data/spec/lapack_core_spec.rb +482 -0
  99. data/spec/leakcheck.rb +16 -0
  100. data/spec/math_spec.rb +1363 -0
  101. data/spec/nmatrix_yale_resize_test_associations.yaml +2802 -0
  102. data/spec/nmatrix_yale_spec.rb +286 -0
  103. data/spec/rspec_monkeys.rb +56 -0
  104. data/spec/rspec_spec.rb +35 -0
  105. data/spec/shortcuts_spec.rb +474 -0
  106. data/spec/slice_set_spec.rb +162 -0
  107. data/spec/spec_helper.rb +172 -0
  108. data/spec/stat_spec.rb +214 -0
  109. data/spec/test.pcd +20 -0
  110. data/spec/utm5940.mtx +83844 -0
  111. metadata +295 -0
@@ -0,0 +1,1096 @@
1
+ /////////////////////////////////////////////////////////////////////
2
+ // = NMatrix
3
+ //
4
+ // A linear algebra library for scientific computation in Ruby.
5
+ // NMatrix is part of SciRuby.
6
+ //
7
+ // NMatrix was originally inspired by and derived from NArray, by
8
+ // Masahiro Tanaka: http://narray.rubyforge.org
9
+ //
10
+ // == Copyright Information
11
+ //
12
+ // SciRuby is Copyright (c) 2010 - 2014, Ruby Science Foundation
13
+ // NMatrix is Copyright (c) 2012 - 2014, John Woods and the Ruby Science Foundation
14
+ //
15
+ // Please see LICENSE.txt for additional copyright notices.
16
+ //
17
+ // == Contributing
18
+ //
19
+ // By contributing source code to SciRuby, you agree to be bound by
20
+ // our Contributor Agreement:
21
+ //
22
+ // * https://github.com/SciRuby/sciruby/wiki/Contributor-Agreement
23
+ //
24
+ // == dense.c
25
+ //
26
+ // Dense n-dimensional matrix storage.
27
+
28
+ /*
29
+ * Standard Includes
30
+ */
31
+
32
+ #include <ruby.h>
33
+
34
+ /*
35
+ * Project Includes
36
+ */
37
+ #include "../../data/data.h"
38
+ #include "../../math/long_dtype.h"
39
+ #include "../../math/gemm.h"
40
+ #include "../../math/gemv.h"
41
+ #include "../../math/math.h"
42
+ #include "../common.h"
43
+ #include "dense.h"
44
+
45
+ /*
46
+ * Macros
47
+ */
48
+
49
+ /*
50
+ * Global Variables
51
+ */
52
+
53
+ /*
54
+ * Forward Declarations
55
+ */
56
+
57
+ namespace nm { namespace dense_storage {
58
+
59
+ template<typename LDType, typename RDType>
60
+ void ref_slice_copy_transposed(const DENSE_STORAGE* rhs, DENSE_STORAGE* lhs);
61
+
62
+ template <typename LDType, typename RDType>
63
+ DENSE_STORAGE* cast_copy(const DENSE_STORAGE* rhs, nm::dtype_t new_dtype);
64
+
65
+ template <typename LDType, typename RDType>
66
+ bool eqeq(const DENSE_STORAGE* left, const DENSE_STORAGE* right);
67
+
68
+ template <typename DType>
69
+ static DENSE_STORAGE* matrix_multiply(const STORAGE_PAIR& casted_storage, size_t* resulting_shape, bool vector);
70
+
71
+ template <typename DType>
72
+ bool is_hermitian(const DENSE_STORAGE* mat, int lda);
73
+
74
+ template <typename DType>
75
+ bool is_symmetric(const DENSE_STORAGE* mat, int lda);
76
+
77
+
78
+ /*
79
+ * Recursive slicing for N-dimensional matrix.
80
+ */
81
+ template <typename LDType, typename RDType>
82
+ static void slice_copy(DENSE_STORAGE *dest, const DENSE_STORAGE *src, size_t* lengths, size_t pdest, size_t psrc, size_t n) {
83
+ if (src->dim - n > 1) {
84
+ for (size_t i = 0; i < lengths[n]; ++i) {
85
+ slice_copy<LDType,RDType>(dest, src, lengths,
86
+ pdest + dest->stride[n]*i,
87
+ psrc + src->stride[n]*i,
88
+ n + 1);
89
+ }
90
+ } else {
91
+ for (size_t p = 0; p < dest->shape[n]; ++p) {
92
+ reinterpret_cast<LDType*>(dest->elements)[p+pdest] = reinterpret_cast<RDType*>(src->elements)[p+psrc];
93
+ }
94
+ /*memcpy((char*)dest->elements + pdest*DTYPE_SIZES[dest->dtype],
95
+ (char*)src->elements + psrc*DTYPE_SIZES[src->dtype],
96
+ dest->shape[n]*DTYPE_SIZES[dest->dtype]); */
97
+ }
98
+
99
+ }
100
+
101
+ /*
102
+ * Recursive function, sets multiple values in a matrix from a single source value. Same basic pattern as slice_copy.
103
+ */
104
+ template <typename D>
105
+ static void slice_set(DENSE_STORAGE* dest, size_t* lengths, size_t pdest, size_t rank, D* const v, size_t v_size, size_t& v_offset) {
106
+ if (dest->dim - rank > 1) {
107
+ for (size_t i = 0; i < lengths[rank]; ++i) {
108
+ slice_set<D>(dest, lengths, pdest + dest->stride[rank] * i, rank + 1, v, v_size, v_offset);
109
+ }
110
+ } else {
111
+ for (size_t p = 0; p < lengths[rank]; ++p, ++v_offset) {
112
+ if (v_offset >= v_size) v_offset %= v_size;
113
+
114
+ D* elem = reinterpret_cast<D*>(dest->elements);
115
+ elem[p + pdest] = v[v_offset];
116
+ }
117
+ }
118
+ }
119
+
120
+
121
+ /*
122
+ * Dense storage set/slice-set function, templated version.
123
+ */
124
+ template <typename D>
125
+ void set(VALUE left, SLICE* slice, VALUE right) {
126
+ NM_CONSERVATIVE(nm_register_value(&left));
127
+ NM_CONSERVATIVE(nm_register_value(&right));
128
+
129
+ DENSE_STORAGE* s = NM_STORAGE_DENSE(left);
130
+
131
+ std::pair<NMATRIX*,bool> nm_and_free =
132
+ interpret_arg_as_dense_nmatrix(right, s->dtype);
133
+
134
+ // Map the data onto D* v.
135
+ D* v;
136
+ size_t v_size = 1;
137
+
138
+ if (nm_and_free.first) {
139
+ DENSE_STORAGE* t = reinterpret_cast<DENSE_STORAGE*>(nm_and_free.first->storage);
140
+ v = reinterpret_cast<D*>(t->elements);
141
+ v_size = nm_storage_count_max_elements(t);
142
+
143
+ } else if (RB_TYPE_P(right, T_ARRAY)) {
144
+
145
+ v_size = RARRAY_LEN(right);
146
+ v = NM_ALLOC_N(D, v_size);
147
+ if (s->dtype == nm::RUBYOBJ)
148
+ nm_register_values(reinterpret_cast<VALUE*>(v), v_size);
149
+
150
+ for (size_t m = 0; m < v_size; ++m) {
151
+ rubyval_to_cval(rb_ary_entry(right, m), s->dtype, &(v[m]));
152
+ }
153
+
154
+ } else {
155
+ v = reinterpret_cast<D*>(rubyobj_to_cval(right, NM_DTYPE(left)));
156
+ if (s->dtype == nm::RUBYOBJ)
157
+ nm_register_values(reinterpret_cast<VALUE*>(v), v_size);
158
+ }
159
+
160
+ if (slice->single) {
161
+ reinterpret_cast<D*>(s->elements)[nm_dense_storage_pos(s, slice->coords)] = *v;
162
+ } else {
163
+ size_t v_offset = 0;
164
+ slice_set(s, slice->lengths, nm_dense_storage_pos(s, slice->coords), 0, v, v_size, v_offset);
165
+ }
166
+
167
+ // Only free v if it was allocated in this function.
168
+ if (nm_and_free.first) {
169
+ if (nm_and_free.second) {
170
+ nm_delete(nm_and_free.first);
171
+ }
172
+ } else {
173
+ if (s->dtype == nm::RUBYOBJ)
174
+ nm_unregister_values(reinterpret_cast<VALUE*>(v), v_size);
175
+ NM_FREE(v);
176
+ }
177
+ NM_CONSERVATIVE(nm_unregister_value(&left));
178
+ NM_CONSERVATIVE(nm_unregister_value(&right));
179
+
180
+ }
181
+
182
+ }} // end of namespace nm::dense_storage
183
+
184
+
185
+ extern "C" {
186
+
187
+ static size_t* stride(size_t* shape, size_t dim);
188
+ static void slice_copy(DENSE_STORAGE *dest, const DENSE_STORAGE *src, size_t* lengths, size_t pdest, size_t psrc, size_t n);
189
+
190
+ /*
191
+ * Functions
192
+ */
193
+
194
+ ///////////////
195
+ // Lifecycle //
196
+ ///////////////
197
+
198
+
199
+ /*
200
+ * This creates a dummy with all the properties of dense storage, but no actual elements allocation.
201
+ *
202
+ * elements will be NULL when this function finishes. You can clean up with nm_dense_storage_delete, which will
203
+ * check for that NULL pointer before freeing elements.
204
+ */
205
+ static DENSE_STORAGE* nm_dense_storage_create_dummy(nm::dtype_t dtype, size_t* shape, size_t dim) {
206
+ DENSE_STORAGE* s = NM_ALLOC( DENSE_STORAGE );
207
+
208
+ s->dim = dim;
209
+ s->shape = shape;
210
+ s->dtype = dtype;
211
+
212
+ s->offset = NM_ALLOC_N(size_t, dim);
213
+ memset(s->offset, 0, sizeof(size_t)*dim);
214
+
215
+ s->stride = stride(shape, dim);
216
+ s->count = 1;
217
+ s->src = s;
218
+
219
+ s->elements = NULL;
220
+
221
+ return s;
222
+ }
223
+
224
+
225
+ /*
226
+ * Note that elements and elements_length are for initial value(s) passed in.
227
+ * If they are the correct length, they will be used directly. If not, they
228
+ * will be concatenated over and over again into a new elements array. If
229
+ * elements is NULL, the new elements array will not be initialized.
230
+ */
231
+ DENSE_STORAGE* nm_dense_storage_create(nm::dtype_t dtype, size_t* shape, size_t dim, void* elements, size_t elements_length) {
232
+ if (dtype == nm::RUBYOBJ)
233
+ nm_register_values(reinterpret_cast<VALUE*>(elements), elements_length);
234
+
235
+ DENSE_STORAGE* s = nm_dense_storage_create_dummy(dtype, shape, dim);
236
+ size_t count = nm_storage_count_max_elements(s);
237
+
238
+ if (elements_length == count) {
239
+ s->elements = elements;
240
+
241
+ if (dtype == nm::RUBYOBJ)
242
+ nm_unregister_values(reinterpret_cast<VALUE*>(elements), elements_length);
243
+
244
+ } else {
245
+
246
+ s->elements = NM_ALLOC_N(char, DTYPE_SIZES[dtype]*count);
247
+
248
+ if (dtype == nm::RUBYOBJ)
249
+ nm_unregister_values(reinterpret_cast<VALUE*>(elements), elements_length);
250
+
251
+ size_t copy_length = elements_length;
252
+
253
+ if (elements_length > 0) {
254
+ // Repeat elements over and over again until the end of the matrix.
255
+ for (size_t i = 0; i < count; i += elements_length) {
256
+
257
+ if (i + elements_length > count) {
258
+ copy_length = count - i;
259
+ }
260
+
261
+ memcpy((char*)(s->elements)+i*DTYPE_SIZES[dtype], (char*)(elements)+(i % elements_length)*DTYPE_SIZES[dtype], copy_length*DTYPE_SIZES[dtype]);
262
+ }
263
+
264
+ // Get rid of the init_val.
265
+ NM_FREE(elements);
266
+ }
267
+ }
268
+
269
+ return s;
270
+ }
271
+
272
+
273
+ /*
274
+ * Destructor for dense storage. Make sure when you update this you also update nm_dense_storage_delete_dummy.
275
+ */
276
+ void nm_dense_storage_delete(STORAGE* s) {
277
+ // Sometimes Ruby passes in NULL storage for some reason (probably on copy construction failure).
278
+ if (s) {
279
+ DENSE_STORAGE* storage = (DENSE_STORAGE*)s;
280
+ if(storage->count-- == 1) {
281
+ NM_FREE(storage->shape);
282
+ NM_FREE(storage->offset);
283
+ NM_FREE(storage->stride);
284
+ if (storage->elements != NULL) {// happens with dummy objects
285
+ NM_FREE(storage->elements);
286
+ }
287
+ NM_FREE(storage);
288
+ }
289
+ }
290
+ }
291
+
292
+ /*
293
+ * Destructor for dense storage references (slicing).
294
+ */
295
+ void nm_dense_storage_delete_ref(STORAGE* s) {
296
+ // Sometimes Ruby passes in NULL storage for some reason (probably on copy construction failure).
297
+ if (s) {
298
+ DENSE_STORAGE* storage = (DENSE_STORAGE*)s;
299
+ nm_dense_storage_delete( reinterpret_cast<STORAGE*>(storage->src) );
300
+ NM_FREE(storage->shape);
301
+ NM_FREE(storage->offset);
302
+ NM_FREE(storage);
303
+ }
304
+ }
305
+
306
+ /*
307
+ * Mark values in a dense matrix for garbage collection. This may not be necessary -- further testing required.
308
+ */
309
+ void nm_dense_storage_mark(STORAGE* storage_base) {
310
+
311
+ DENSE_STORAGE* storage = (DENSE_STORAGE*)storage_base;
312
+
313
+ if (storage && storage->dtype == nm::RUBYOBJ) {
314
+ VALUE* els = reinterpret_cast<VALUE*>(storage->elements);
315
+
316
+ if (els) {
317
+ rb_gc_mark_locations(els, &(els[nm_storage_count_max_elements(storage)-1]));
318
+ }
319
+ //for (size_t index = nm_storage_count_max_elements(storage); index-- > 0;) {
320
+ // rb_gc_mark(els[index]);
321
+ //}
322
+ }
323
+ }
324
+
325
+ /**
326
+ * Register a dense storage struct as in-use to avoid garbage collection of the
327
+ * elements stored.
328
+ *
329
+ * This function will check dtype and ignore non-object dtype, so its safe to pass any dense storage in.
330
+ *
331
+ */
332
+ void nm_dense_storage_register(const STORAGE* s) {
333
+ const DENSE_STORAGE* storage = reinterpret_cast<const DENSE_STORAGE*>(s);
334
+ if (storage->dtype == nm::RUBYOBJ && storage->elements) {
335
+ nm_register_values(reinterpret_cast<VALUE*>(storage->elements), nm_storage_count_max_elements(storage));
336
+ }
337
+ }
338
+
339
+ /**
340
+ * Unregister a dense storage struct to allow normal garbage collection of the
341
+ * elements stored.
342
+ *
343
+ * This function will check dtype and ignore non-object dtype, so its safe to pass any dense storage in.
344
+ *
345
+ */
346
+ void nm_dense_storage_unregister(const STORAGE* s) {
347
+ const DENSE_STORAGE* storage = reinterpret_cast<const DENSE_STORAGE*>(s);
348
+ if (storage->dtype == nm::RUBYOBJ && storage->elements) {
349
+ nm_unregister_values(reinterpret_cast<VALUE*>(storage->elements), nm_storage_count_max_elements(storage));
350
+ }
351
+ }
352
+
353
+ ///////////////
354
+ // Accessors //
355
+ ///////////////
356
+
357
+
358
+
359
+ /*
360
+ * map_pair iterator for dense matrices (for element-wise operations)
361
+ */
362
+ VALUE nm_dense_map_pair(VALUE self, VALUE right) {
363
+
364
+ NM_CONSERVATIVE(nm_register_value(&self));
365
+ NM_CONSERVATIVE(nm_register_value(&right));
366
+
367
+ RETURN_SIZED_ENUMERATOR_PRE
368
+ NM_CONSERVATIVE(nm_unregister_value(&right));
369
+ NM_CONSERVATIVE(nm_unregister_value(&self));
370
+ RETURN_SIZED_ENUMERATOR(self, 0, 0, nm_enumerator_length);
371
+
372
+ DENSE_STORAGE *s = NM_STORAGE_DENSE(self),
373
+ *t = NM_STORAGE_DENSE(right);
374
+
375
+ size_t* coords = NM_ALLOCA_N(size_t, s->dim);
376
+ memset(coords, 0, sizeof(size_t) * s->dim);
377
+
378
+ size_t *shape_copy = NM_ALLOC_N(size_t, s->dim);
379
+ memcpy(shape_copy, s->shape, sizeof(size_t) * s->dim);
380
+
381
+ size_t count = nm_storage_count_max_elements(s);
382
+
383
+ DENSE_STORAGE* result = nm_dense_storage_create(nm::RUBYOBJ, shape_copy, s->dim, NULL, 0);
384
+
385
+ VALUE* result_elem = reinterpret_cast<VALUE*>(result->elements);
386
+ nm_dense_storage_register(result);
387
+
388
+ for (size_t k = 0; k < count; ++k) {
389
+ nm_dense_storage_coords(result, k, coords);
390
+ size_t s_index = nm_dense_storage_pos(s, coords),
391
+ t_index = nm_dense_storage_pos(t, coords);
392
+
393
+ VALUE sval = NM_DTYPE(self) == nm::RUBYOBJ ? reinterpret_cast<VALUE*>(s->elements)[s_index] : nm::rubyobj_from_cval((char*)(s->elements) + s_index*DTYPE_SIZES[NM_DTYPE(self)], NM_DTYPE(self)).rval;
394
+ nm_register_value(&sval);
395
+ VALUE tval = NM_DTYPE(right) == nm::RUBYOBJ ? reinterpret_cast<VALUE*>(t->elements)[t_index] : nm::rubyobj_from_cval((char*)(t->elements) + t_index*DTYPE_SIZES[NM_DTYPE(right)], NM_DTYPE(right)).rval;
396
+ result_elem[k] = rb_yield_values(2, sval, tval);
397
+ nm_unregister_value(&sval);
398
+ }
399
+
400
+ VALUE klass = CLASS_OF(self);
401
+ NMATRIX* m = nm_create(nm::DENSE_STORE, reinterpret_cast<STORAGE*>(result));
402
+ nm_register_nmatrix(m);
403
+ VALUE to_return = Data_Wrap_Struct(klass, nm_mark, nm_delete, m);
404
+
405
+ nm_unregister_nmatrix(m);
406
+ nm_dense_storage_unregister(result);
407
+ NM_CONSERVATIVE(nm_unregister_value(&self));
408
+ NM_CONSERVATIVE(nm_unregister_value(&right));
409
+
410
+ return to_return;
411
+
412
+ }
413
+
414
+ /*
415
+ * map enumerator for dense matrices.
416
+ */
417
+ VALUE nm_dense_map(VALUE self) {
418
+
419
+ NM_CONSERVATIVE(nm_register_value(&self));
420
+
421
+ RETURN_SIZED_ENUMERATOR_PRE
422
+ NM_CONSERVATIVE(nm_unregister_value(&self));
423
+ RETURN_SIZED_ENUMERATOR(self, 0, 0, nm_enumerator_length);
424
+
425
+ DENSE_STORAGE *s = NM_STORAGE_DENSE(self);
426
+
427
+ size_t* coords = NM_ALLOCA_N(size_t, s->dim);
428
+ memset(coords, 0, sizeof(size_t) * s->dim);
429
+
430
+ size_t *shape_copy = NM_ALLOC_N(size_t, s->dim);
431
+ memcpy(shape_copy, s->shape, sizeof(size_t) * s->dim);
432
+
433
+ size_t count = nm_storage_count_max_elements(s);
434
+
435
+ DENSE_STORAGE* result = nm_dense_storage_create(nm::RUBYOBJ, shape_copy, s->dim, NULL, 0);
436
+
437
+ VALUE* result_elem = reinterpret_cast<VALUE*>(result->elements);
438
+
439
+ nm_dense_storage_register(result);
440
+
441
+ for (size_t k = 0; k < count; ++k) {
442
+ nm_dense_storage_coords(result, k, coords);
443
+ size_t s_index = nm_dense_storage_pos(s, coords);
444
+
445
+ result_elem[k] = rb_yield(NM_DTYPE(self) == nm::RUBYOBJ ? reinterpret_cast<VALUE*>(s->elements)[s_index] : nm::rubyobj_from_cval((char*)(s->elements) + s_index*DTYPE_SIZES[NM_DTYPE(self)], NM_DTYPE(self)).rval);
446
+ }
447
+
448
+ VALUE klass = CLASS_OF(self);
449
+
450
+ NMATRIX* m = nm_create(nm::DENSE_STORE, reinterpret_cast<STORAGE*>(result));
451
+ nm_register_nmatrix(m);
452
+
453
+ VALUE to_return = Data_Wrap_Struct(klass, nm_mark, nm_delete, m);
454
+
455
+ nm_unregister_nmatrix(m);
456
+ nm_dense_storage_unregister(result);
457
+ NM_CONSERVATIVE(nm_unregister_value(&self));
458
+
459
+ return to_return;
460
+ }
461
+
462
+
463
+ /*
464
+ * each_with_indices iterator for dense matrices.
465
+ */
466
+ VALUE nm_dense_each_with_indices(VALUE nmatrix) {
467
+
468
+ NM_CONSERVATIVE(nm_register_value(&nmatrix));
469
+
470
+ RETURN_SIZED_ENUMERATOR_PRE
471
+ NM_CONSERVATIVE(nm_unregister_value(&nmatrix));
472
+ RETURN_SIZED_ENUMERATOR(nmatrix, 0, 0, nm_enumerator_length); // fourth argument only used by Ruby2+
473
+ DENSE_STORAGE* s = NM_STORAGE_DENSE(nmatrix);
474
+
475
+ // Create indices and initialize them to zero
476
+ size_t* coords = NM_ALLOCA_N(size_t, s->dim);
477
+ memset(coords, 0, sizeof(size_t) * s->dim);
478
+
479
+ size_t slice_index;
480
+ size_t* shape_copy = NM_ALLOC_N(size_t, s->dim);
481
+ memcpy(shape_copy, s->shape, sizeof(size_t) * s->dim);
482
+
483
+ DENSE_STORAGE* sliced_dummy = nm_dense_storage_create_dummy(s->dtype, shape_copy, s->dim);
484
+
485
+ for (size_t k = 0; k < nm_storage_count_max_elements(s); ++k) {
486
+ nm_dense_storage_coords(sliced_dummy, k, coords);
487
+ slice_index = nm_dense_storage_pos(s, coords);
488
+ VALUE ary = rb_ary_new();
489
+ nm_register_value(&ary);
490
+ if (NM_DTYPE(nmatrix) == nm::RUBYOBJ) rb_ary_push(ary, reinterpret_cast<VALUE*>(s->elements)[slice_index]);
491
+ else rb_ary_push(ary, nm::rubyobj_from_cval((char*)(s->elements) + slice_index*DTYPE_SIZES[NM_DTYPE(nmatrix)], NM_DTYPE(nmatrix)).rval);
492
+
493
+ for (size_t p = 0; p < s->dim; ++p) {
494
+ rb_ary_push(ary, INT2FIX(coords[p]));
495
+ }
496
+
497
+ // yield the array which now consists of the value and the indices
498
+ rb_yield(ary);
499
+ nm_unregister_value(&ary);
500
+ }
501
+
502
+ nm_dense_storage_delete(sliced_dummy);
503
+
504
+ NM_CONSERVATIVE(nm_unregister_value(&nmatrix));
505
+
506
+ return nmatrix;
507
+
508
+ }
509
+
510
+
511
+ /*
512
+ * Borrowed this function from NArray. Handles 'each' iteration on a dense
513
+ * matrix.
514
+ *
515
+ * Additionally, handles separately matrices containing VALUEs and matrices
516
+ * containing other types of data.
517
+ */
518
+ VALUE nm_dense_each(VALUE nmatrix) {
519
+
520
+ NM_CONSERVATIVE(nm_register_value(&nmatrix));
521
+
522
+ RETURN_SIZED_ENUMERATOR_PRE
523
+ NM_CONSERVATIVE(nm_unregister_value(&nmatrix));
524
+ RETURN_SIZED_ENUMERATOR(nmatrix, 0, 0, nm_enumerator_length);
525
+
526
+ DENSE_STORAGE* s = NM_STORAGE_DENSE(nmatrix);
527
+
528
+ size_t* temp_coords = NM_ALLOCA_N(size_t, s->dim);
529
+ size_t sliced_index;
530
+ size_t* shape_copy = NM_ALLOC_N(size_t, s->dim);
531
+ memcpy(shape_copy, s->shape, sizeof(size_t) * s->dim);
532
+ DENSE_STORAGE* sliced_dummy = nm_dense_storage_create_dummy(s->dtype, shape_copy, s->dim);
533
+
534
+ if (NM_DTYPE(nmatrix) == nm::RUBYOBJ) {
535
+
536
+ // matrix of Ruby objects -- yield those objects directly
537
+ for (size_t i = 0; i < nm_storage_count_max_elements(s); ++i) {
538
+ nm_dense_storage_coords(sliced_dummy, i, temp_coords);
539
+ sliced_index = nm_dense_storage_pos(s, temp_coords);
540
+ rb_yield( reinterpret_cast<VALUE*>(s->elements)[sliced_index] );
541
+ }
542
+
543
+ } else {
544
+
545
+ // We're going to copy the matrix element into a Ruby VALUE and then operate on it. This way user can't accidentally
546
+ // modify it and cause a seg fault.
547
+ for (size_t i = 0; i < nm_storage_count_max_elements(s); ++i) {
548
+ nm_dense_storage_coords(sliced_dummy, i, temp_coords);
549
+ sliced_index = nm_dense_storage_pos(s, temp_coords);
550
+ VALUE v = nm::rubyobj_from_cval((char*)(s->elements) + sliced_index*DTYPE_SIZES[NM_DTYPE(nmatrix)], NM_DTYPE(nmatrix)).rval;
551
+ rb_yield( v ); // yield to the copy we made
552
+ }
553
+ }
554
+
555
+ nm_dense_storage_delete(sliced_dummy);
556
+ NM_CONSERVATIVE(nm_unregister_value(&nmatrix));
557
+
558
+ return nmatrix;
559
+
560
+ }
561
+
562
+
563
+ /*
564
+ * Non-templated version of nm::dense_storage::slice_copy
565
+ */
566
+ static void slice_copy(DENSE_STORAGE *dest, const DENSE_STORAGE *src, size_t* lengths, size_t pdest, size_t psrc, size_t n) {
567
+ NAMED_LR_DTYPE_TEMPLATE_TABLE(slice_copy_table, nm::dense_storage::slice_copy, void, DENSE_STORAGE*, const DENSE_STORAGE*, size_t*, size_t, size_t, size_t)
568
+
569
+ slice_copy_table[dest->dtype][src->dtype](dest, src, lengths, pdest, psrc, n);
570
+ }
571
+
572
+
573
+ /*
574
+ * Get a slice or one element, using copying.
575
+ *
576
+ * FIXME: Template the first condition.
577
+ */
578
+ void* nm_dense_storage_get(const STORAGE* storage, SLICE* slice) {
579
+ DENSE_STORAGE* s = (DENSE_STORAGE*)storage;
580
+ if (slice->single)
581
+ return (char*)(s->elements) + nm_dense_storage_pos(s, slice->coords) * DTYPE_SIZES[s->dtype];
582
+ else {
583
+ nm_dense_storage_register(s);
584
+ size_t *shape = NM_ALLOC_N(size_t, s->dim);
585
+ for (size_t i = 0; i < s->dim; ++i) {
586
+ shape[i] = slice->lengths[i];
587
+ }
588
+
589
+ DENSE_STORAGE* ns = nm_dense_storage_create(s->dtype, shape, s->dim, NULL, 0);
590
+
591
+ slice_copy(ns,
592
+ reinterpret_cast<const DENSE_STORAGE*>(s->src),
593
+ slice->lengths,
594
+ 0,
595
+ nm_dense_storage_pos(s, slice->coords),
596
+ 0);
597
+
598
+ nm_dense_storage_unregister(s);
599
+ return ns;
600
+ }
601
+ }
602
+
603
+ /*
604
+ * Get a slice or one element by reference (no copy).
605
+ *
606
+ * FIXME: Template the first condition.
607
+ */
608
+ void* nm_dense_storage_ref(const STORAGE* storage, SLICE* slice) {
609
+ DENSE_STORAGE* s = (DENSE_STORAGE*)storage;
610
+
611
+ if (slice->single)
612
+ return (char*)(s->elements) + nm_dense_storage_pos(s, slice->coords) * DTYPE_SIZES[s->dtype];
613
+
614
+ else {
615
+ nm_dense_storage_register(s);
616
+ DENSE_STORAGE* ns = NM_ALLOC( DENSE_STORAGE );
617
+ ns->dim = s->dim;
618
+ ns->dtype = s->dtype;
619
+ ns->offset = NM_ALLOC_N(size_t, ns->dim);
620
+ ns->shape = NM_ALLOC_N(size_t, ns->dim);
621
+
622
+ for (size_t i = 0; i < ns->dim; ++i) {
623
+ ns->offset[i] = slice->coords[i] + s->offset[i];
624
+ ns->shape[i] = slice->lengths[i];
625
+ }
626
+
627
+ ns->stride = s->stride;
628
+ ns->elements = s->elements;
629
+
630
+ s->src->count++;
631
+ ns->src = s->src;
632
+
633
+ nm_dense_storage_unregister(s);
634
+ return ns;
635
+ }
636
+ }
637
+
638
+
639
+
640
+
641
+ /*
642
+ * Set a value or values in a dense matrix. Requires that right be either a single value or an NMatrix (ref or real).
643
+ */
644
+ void nm_dense_storage_set(VALUE left, SLICE* slice, VALUE right) {
645
+ NAMED_DTYPE_TEMPLATE_TABLE(ttable, nm::dense_storage::set, void, VALUE, SLICE*, VALUE)
646
+ nm::dtype_t dtype = NM_DTYPE(left);
647
+ ttable[dtype](left, slice, right);
648
+ }
649
+
650
+
651
+ ///////////
652
+ // Tests //
653
+ ///////////
654
+
655
+ /*
656
+ * Do these two dense matrices have the same contents?
657
+ *
658
+ * TODO: Test the shape of the two matrices.
659
+ * TODO: See if using memcmp is faster when the left- and right-hand matrices
660
+ * have the same dtype.
661
+ */
662
+ bool nm_dense_storage_eqeq(const STORAGE* left, const STORAGE* right) {
663
+ LR_DTYPE_TEMPLATE_TABLE(nm::dense_storage::eqeq, bool, const DENSE_STORAGE*, const DENSE_STORAGE*)
664
+
665
+ if (!ttable[left->dtype][right->dtype]) {
666
+ rb_raise(nm_eDataTypeError, "comparison between these dtypes is undefined");
667
+ return false;
668
+ }
669
+
670
+ return ttable[left->dtype][right->dtype]((const DENSE_STORAGE*)left, (const DENSE_STORAGE*)right);
671
+ }
672
+
673
+ /*
674
+ * Test to see if the matrix is Hermitian. If the matrix does not have a
675
+ * dtype of Complex64 or Complex128 this is the same as testing for symmetry.
676
+ */
677
+ bool nm_dense_storage_is_hermitian(const DENSE_STORAGE* mat, int lda) {
678
+ if (mat->dtype == nm::COMPLEX64) {
679
+ return nm::dense_storage::is_hermitian<nm::Complex64>(mat, lda);
680
+
681
+ } else if (mat->dtype == nm::COMPLEX128) {
682
+ return nm::dense_storage::is_hermitian<nm::Complex128>(mat, lda);
683
+
684
+ } else {
685
+ return nm_dense_storage_is_symmetric(mat, lda);
686
+ }
687
+ }
688
+
689
+ /*
690
+ * Is this dense matrix symmetric about the diagonal?
691
+ */
692
+ bool nm_dense_storage_is_symmetric(const DENSE_STORAGE* mat, int lda) {
693
+ DTYPE_TEMPLATE_TABLE(nm::dense_storage::is_symmetric, bool, const DENSE_STORAGE*, int);
694
+
695
+ return ttable[mat->dtype](mat, lda);
696
+ }
697
+
698
+ //////////
699
+ // Math //
700
+ //////////
701
+
702
+
703
+ /*
704
+ * Dense matrix-matrix multiplication.
705
+ */
706
+ STORAGE* nm_dense_storage_matrix_multiply(const STORAGE_PAIR& casted_storage, size_t* resulting_shape, bool vector) {
707
+ DTYPE_TEMPLATE_TABLE(nm::dense_storage::matrix_multiply, DENSE_STORAGE*, const STORAGE_PAIR& casted_storage, size_t* resulting_shape, bool vector);
708
+
709
+ return ttable[casted_storage.left->dtype](casted_storage, resulting_shape, vector);
710
+ }
711
+
712
+ /////////////
713
+ // Utility //
714
+ /////////////
715
+
716
+ /*
717
+ * Determine the linear array position (in elements of s) of some set of coordinates
718
+ * (given by slice).
719
+ */
720
+ size_t nm_dense_storage_pos(const DENSE_STORAGE* s, const size_t* coords) {
721
+ size_t pos = 0;
722
+
723
+ for (size_t i = 0; i < s->dim; ++i)
724
+ pos += (coords[i] + s->offset[i]) * s->stride[i];
725
+
726
+ return pos;
727
+
728
+ }
729
+
730
+ /*
731
+ * Determine the a set of slice coordinates from linear array position (in elements
732
+ * of s) of some set of coordinates (given by slice). (Inverse of
733
+ * nm_dense_storage_pos).
734
+ *
735
+ * The parameter coords_out should be a pre-allocated array of size equal to s->dim.
736
+ */
737
+ void nm_dense_storage_coords(const DENSE_STORAGE* s, const size_t slice_pos, size_t* coords_out) {
738
+
739
+ size_t temp_pos = slice_pos;
740
+
741
+ for (size_t i = 0; i < s->dim; ++i) {
742
+ coords_out[i] = (temp_pos - temp_pos % s->stride[i])/s->stride[i] - s->offset[i];
743
+ temp_pos = temp_pos % s->stride[i];
744
+ }
745
+ }
746
+
747
+ /*
748
+ * Calculate the stride length.
749
+ */
750
+ static size_t* stride(size_t* shape, size_t dim) {
751
+ size_t i, j;
752
+ size_t* stride = NM_ALLOC_N(size_t, dim);
753
+
754
+ for (i = 0; i < dim; ++i) {
755
+ stride[i] = 1;
756
+ for (j = i+1; j < dim; ++j) {
757
+ stride[i] *= shape[j];
758
+ }
759
+ }
760
+
761
+ return stride;
762
+ }
763
+
764
+
765
+ /////////////////////////
766
+ // Copying and Casting //
767
+ /////////////////////////
768
+
769
+ /*
770
+ * Copy dense storage, changing dtype if necessary.
771
+ */
772
+ STORAGE* nm_dense_storage_cast_copy(const STORAGE* rhs, nm::dtype_t new_dtype, void* dummy) {
773
+ NAMED_LR_DTYPE_TEMPLATE_TABLE(ttable, nm::dense_storage::cast_copy, DENSE_STORAGE*, const DENSE_STORAGE* rhs, nm::dtype_t new_dtype);
774
+
775
+ if (!ttable[new_dtype][rhs->dtype]) {
776
+ rb_raise(nm_eDataTypeError, "cast between these dtypes is undefined");
777
+ return NULL;
778
+ }
779
+
780
+ return (STORAGE*)ttable[new_dtype][rhs->dtype]((DENSE_STORAGE*)rhs, new_dtype);
781
+ }
782
+
783
+ /*
784
+ * Copy dense storage without a change in dtype.
785
+ */
786
+ DENSE_STORAGE* nm_dense_storage_copy(const DENSE_STORAGE* rhs) {
787
+ nm_dense_storage_register(rhs);
788
+
789
+ size_t count = 0;
790
+ size_t *shape = NM_ALLOC_N(size_t, rhs->dim);
791
+
792
+ // copy shape and offset
793
+ for (size_t i = 0; i < rhs->dim; ++i) {
794
+ shape[i] = rhs->shape[i];
795
+ }
796
+
797
+ DENSE_STORAGE* lhs = nm_dense_storage_create(rhs->dtype, shape, rhs->dim, NULL, 0);
798
+ count = nm_storage_count_max_elements(lhs);
799
+
800
+
801
+ // Ensure that allocation worked before copying.
802
+ if (lhs && count) {
803
+ if (rhs == rhs->src) // not a reference
804
+ memcpy(lhs->elements, rhs->elements, DTYPE_SIZES[rhs->dtype] * count);
805
+ else { // slice whole matrix
806
+ nm_dense_storage_register(lhs);
807
+ size_t *offset = NM_ALLOC_N(size_t, rhs->dim);
808
+ memset(offset, 0, sizeof(size_t) * rhs->dim);
809
+
810
+ slice_copy(lhs,
811
+ reinterpret_cast<const DENSE_STORAGE*>(rhs->src),
812
+ rhs->shape,
813
+ 0,
814
+ nm_dense_storage_pos(rhs, offset),
815
+ 0);
816
+
817
+ nm_dense_storage_unregister(lhs);
818
+ }
819
+ }
820
+
821
+ nm_dense_storage_unregister(rhs);
822
+
823
+ return lhs;
824
+ }
825
+
826
+
827
+ /*
828
+ * Transpose dense storage into a new dense storage object. Basically a copy constructor.
829
+ *
830
+ * Not much point in templating this as it's pretty straight-forward.
831
+ */
832
+ STORAGE* nm_dense_storage_copy_transposed(const STORAGE* rhs_base) {
833
+ DENSE_STORAGE* rhs = (DENSE_STORAGE*)rhs_base;
834
+
835
+ nm_dense_storage_register(rhs);
836
+ size_t *shape = NM_ALLOC_N(size_t, rhs->dim);
837
+
838
+ // swap shape
839
+ shape[0] = rhs->shape[1];
840
+ shape[1] = rhs->shape[0];
841
+
842
+ DENSE_STORAGE *lhs = nm_dense_storage_create(rhs->dtype, shape, rhs->dim, NULL, 0);
843
+
844
+ nm_dense_storage_register(lhs);
845
+
846
+ if (rhs_base->src == rhs_base) {
847
+ nm_math_transpose_generic(rhs->shape[0], rhs->shape[1], rhs->elements, rhs->shape[1], lhs->elements, lhs->shape[1], DTYPE_SIZES[rhs->dtype]);
848
+ } else {
849
+ NAMED_LR_DTYPE_TEMPLATE_TABLE(ttable, nm::dense_storage::ref_slice_copy_transposed, void, const DENSE_STORAGE* rhs, DENSE_STORAGE* lhs);
850
+
851
+ if (!ttable[lhs->dtype][rhs->dtype]) {
852
+ nm_dense_storage_unregister(rhs);
853
+ nm_dense_storage_unregister(lhs);
854
+ rb_raise(nm_eDataTypeError, "transposition between these dtypes is undefined");
855
+ }
856
+
857
+ ttable[lhs->dtype][rhs->dtype](rhs, lhs);
858
+ }
859
+
860
+ nm_dense_storage_unregister(rhs);
861
+ nm_dense_storage_unregister(lhs);
862
+
863
+ return (STORAGE*)lhs;
864
+ }
865
+
866
+ } // end of extern "C" block
867
+
868
+ namespace nm {
869
+
870
+ /*
871
+ * Used for slice setting. Takes the right-hand of the equal sign, a single VALUE, and massages
872
+ * it into the correct form if it's not already there (dtype, non-ref, dense). Returns a pair of the NMATRIX* and a
873
+ * boolean. If the boolean is true, the calling function is responsible for calling nm_delete on the NMATRIX*.
874
+ * Otherwise, the NMATRIX* still belongs to Ruby and Ruby will free it.
875
+ */
876
+ std::pair<NMATRIX*,bool> interpret_arg_as_dense_nmatrix(VALUE right, nm::dtype_t dtype) {
877
+ NM_CONSERVATIVE(nm_register_value(&right));
878
+ if (IsNMatrixType(right)) {
879
+ NMATRIX *r;
880
+ if (NM_STYPE(right) != DENSE_STORE || NM_DTYPE(right) != dtype || NM_SRC(right) != NM_STORAGE(right)) {
881
+ UnwrapNMatrix( right, r );
882
+ NMATRIX* ldtype_r = nm_cast_with_ctype_args(r, nm::DENSE_STORE, dtype, NULL);
883
+ NM_CONSERVATIVE(nm_unregister_value(&right));
884
+ return std::make_pair(ldtype_r,true);
885
+ } else { // simple case -- right-hand matrix is dense and is not a reference and has same dtype
886
+ UnwrapNMatrix( right, r );
887
+ NM_CONSERVATIVE(nm_unregister_value(&right));
888
+ return std::make_pair(r, false);
889
+ }
890
+ // Do not set v_alloc = true for either of these. It is the responsibility of r/ldtype_r
891
+ } else if (RB_TYPE_P(right, T_DATA)) {
892
+ NM_CONSERVATIVE(nm_unregister_value(&right));
893
+ rb_raise(rb_eTypeError, "unrecognized type for slice assignment");
894
+ }
895
+
896
+ NM_CONSERVATIVE(nm_unregister_value(&right));
897
+ return std::make_pair<NMATRIX*,bool>(NULL, false);
898
+ }
899
+
900
+
901
+ namespace dense_storage {
902
+
903
+ /////////////////////////
904
+ // Templated Functions //
905
+ /////////////////////////
906
+
907
+ template<typename LDType, typename RDType>
908
+ void ref_slice_copy_transposed(const DENSE_STORAGE* rhs, DENSE_STORAGE* lhs) {
909
+
910
+ nm_dense_storage_register(rhs);
911
+ nm_dense_storage_register(lhs);
912
+
913
+ LDType* lhs_els = reinterpret_cast<LDType*>(lhs->elements);
914
+ RDType* rhs_els = reinterpret_cast<RDType*>(rhs->elements);
915
+ size_t count = nm_storage_count_max_elements(lhs);;
916
+ size_t* temp_coords = NM_ALLOCA_N(size_t, lhs->dim);
917
+ size_t coord_swap_temp;
918
+
919
+ while (count-- > 0) {
920
+ nm_dense_storage_coords(lhs, count, temp_coords);
921
+ NM_SWAP(temp_coords[0], temp_coords[1], coord_swap_temp);
922
+ size_t r_coord = nm_dense_storage_pos(rhs, temp_coords);
923
+ lhs_els[count] = rhs_els[r_coord];
924
+ }
925
+
926
+ nm_dense_storage_unregister(rhs);
927
+ nm_dense_storage_unregister(lhs);
928
+
929
+ }
930
+
931
+ template <typename LDType, typename RDType>
932
+ DENSE_STORAGE* cast_copy(const DENSE_STORAGE* rhs, dtype_t new_dtype) {
933
+ nm_dense_storage_register(rhs);
934
+
935
+ size_t count = nm_storage_count_max_elements(rhs);
936
+
937
+ size_t *shape = NM_ALLOC_N(size_t, rhs->dim);
938
+ memcpy(shape, rhs->shape, sizeof(size_t) * rhs->dim);
939
+
940
+ DENSE_STORAGE* lhs = nm_dense_storage_create(new_dtype, shape, rhs->dim, NULL, 0);
941
+
942
+ nm_dense_storage_register(lhs);
943
+
944
+ // Ensure that allocation worked before copying.
945
+ if (lhs && count) {
946
+ if (rhs->src != rhs) { // Make a copy of a ref to a matrix.
947
+ size_t* offset = NM_ALLOCA_N(size_t, rhs->dim);
948
+ memset(offset, 0, sizeof(size_t) * rhs->dim);
949
+
950
+ slice_copy(lhs, reinterpret_cast<const DENSE_STORAGE*>(rhs->src),
951
+ rhs->shape, 0,
952
+ nm_dense_storage_pos(rhs, offset), 0);
953
+
954
+ } else { // Make a regular copy.
955
+ RDType* rhs_els = reinterpret_cast<RDType*>(rhs->elements);
956
+ LDType* lhs_els = reinterpret_cast<LDType*>(lhs->elements);
957
+
958
+ for (size_t i = 0; i < count; ++i)
959
+ lhs_els[i] = rhs_els[i];
960
+ }
961
+ }
962
+
963
+ nm_dense_storage_unregister(rhs);
964
+ nm_dense_storage_unregister(lhs);
965
+
966
+ return lhs;
967
+ }
968
+
969
+ template <typename LDType, typename RDType>
970
+ bool eqeq(const DENSE_STORAGE* left, const DENSE_STORAGE* right) {
971
+ nm_dense_storage_register(left);
972
+ nm_dense_storage_register(right);
973
+
974
+ size_t index;
975
+ DENSE_STORAGE *tmp1, *tmp2;
976
+ tmp1 = NULL; tmp2 = NULL;
977
+ bool result = true;
978
+ /* FIXME: Very strange behavior! The GC calls the method directly with non-initialized data. */
979
+
980
+ LDType* left_elements = (LDType*)left->elements;
981
+ RDType* right_elements = (RDType*)right->elements;
982
+
983
+ // Copy elements in temp matrix if you have reference to the right.
984
+ if (left->src != left) {
985
+ tmp1 = nm_dense_storage_copy(left);
986
+ nm_dense_storage_register(tmp1);
987
+ left_elements = (LDType*)tmp1->elements;
988
+ }
989
+ if (right->src != right) {
990
+ tmp2 = nm_dense_storage_copy(right);
991
+ nm_dense_storage_register(tmp2);
992
+ right_elements = (RDType*)tmp2->elements;
993
+ }
994
+
995
+
996
+
997
+ for (index = nm_storage_count_max_elements(left); index-- > 0;) {
998
+ if (left_elements[index] != right_elements[index]) {
999
+ result = false;
1000
+ break;
1001
+ }
1002
+ }
1003
+
1004
+ if (tmp1) {
1005
+ nm_dense_storage_unregister(tmp1);
1006
+ NM_FREE(tmp1);
1007
+ }
1008
+ if (tmp2) {
1009
+ nm_dense_storage_unregister(tmp2);
1010
+ NM_FREE(tmp2);
1011
+ }
1012
+
1013
+ nm_dense_storage_unregister(left);
1014
+ nm_dense_storage_unregister(right);
1015
+ return result;
1016
+ }
1017
+
1018
+ template <typename DType>
1019
+ bool is_hermitian(const DENSE_STORAGE* mat, int lda) {
1020
+ unsigned int i, j;
1021
+ DType complex_conj;
1022
+
1023
+ const DType* els = (DType*) mat->elements;
1024
+
1025
+ for (i = mat->shape[0]; i-- > 0;) {
1026
+ for (j = i + 1; j < mat->shape[1]; ++j) {
1027
+ complex_conj = els[j*lda + i];
1028
+ complex_conj.i = -complex_conj.i;
1029
+
1030
+ if (els[i*lda+j] != complex_conj) {
1031
+ return false;
1032
+ }
1033
+ }
1034
+ }
1035
+
1036
+ return true;
1037
+ }
1038
+
1039
+ template <typename DType>
1040
+ bool is_symmetric(const DENSE_STORAGE* mat, int lda) {
1041
+ unsigned int i, j;
1042
+ const DType* els = (DType*) mat->elements;
1043
+
1044
+ for (i = mat->shape[0]; i-- > 0;) {
1045
+ for (j = i + 1; j < mat->shape[1]; ++j) {
1046
+ if (els[i*lda+j] != els[j*lda+i]) {
1047
+ return false;
1048
+ }
1049
+ }
1050
+ }
1051
+
1052
+ return true;
1053
+ }
1054
+
1055
+
1056
+
1057
+ /*
1058
+ * DType-templated matrix-matrix multiplication for dense storage.
1059
+ */
1060
+ template <typename DType>
1061
+ static DENSE_STORAGE* matrix_multiply(const STORAGE_PAIR& casted_storage, size_t* resulting_shape, bool vector) {
1062
+ DENSE_STORAGE *left = (DENSE_STORAGE*)(casted_storage.left),
1063
+ *right = (DENSE_STORAGE*)(casted_storage.right);
1064
+
1065
+ nm_dense_storage_register(left);
1066
+ nm_dense_storage_register(right);
1067
+
1068
+ // Create result storage.
1069
+ DENSE_STORAGE* result = nm_dense_storage_create(left->dtype, resulting_shape, 2, NULL, 0);
1070
+
1071
+ nm_dense_storage_register(result);
1072
+
1073
+ DType *pAlpha = NM_ALLOCA_N(DType, 1),
1074
+ *pBeta = NM_ALLOCA_N(DType, 1);
1075
+
1076
+ *pAlpha = 1;
1077
+ *pBeta = 0;
1078
+ // Do the multiplication
1079
+ if (vector) nm::math::gemv<DType>(CblasNoTrans, left->shape[0], left->shape[1], pAlpha,
1080
+ reinterpret_cast<DType*>(left->elements), left->shape[1],
1081
+ reinterpret_cast<DType*>(right->elements), 1, pBeta,
1082
+ reinterpret_cast<DType*>(result->elements), 1);
1083
+ else nm::math::gemm<DType>(CblasRowMajor, CblasNoTrans, CblasNoTrans, left->shape[0], right->shape[1], left->shape[1],
1084
+ pAlpha, reinterpret_cast<DType*>(left->elements), left->shape[1],
1085
+ reinterpret_cast<DType*>(right->elements), right->shape[1], pBeta,
1086
+ reinterpret_cast<DType*>(result->elements), result->shape[1]);
1087
+
1088
+
1089
+ nm_dense_storage_unregister(left);
1090
+ nm_dense_storage_unregister(right);
1091
+ nm_dense_storage_unregister(result);
1092
+
1093
+ return result;
1094
+ }
1095
+
1096
+ }} // end of namespace nm::dense_storage