nmatrix 0.0.1 → 0.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gitignore +27 -0
- data/.rspec +2 -0
- data/Gemfile +3 -5
- data/Guardfile +6 -0
- data/History.txt +33 -0
- data/Manifest.txt +41 -38
- data/README.rdoc +88 -11
- data/Rakefile +35 -53
- data/ext/nmatrix/data/complex.h +372 -0
- data/ext/nmatrix/data/data.cpp +275 -0
- data/ext/nmatrix/data/data.h +707 -0
- data/ext/nmatrix/data/rational.h +421 -0
- data/ext/nmatrix/data/ruby_object.h +446 -0
- data/ext/nmatrix/extconf.rb +101 -51
- data/ext/nmatrix/new_extconf.rb +56 -0
- data/ext/nmatrix/nmatrix.cpp +1609 -0
- data/ext/nmatrix/nmatrix.h +265 -849
- data/ext/nmatrix/ruby_constants.cpp +134 -0
- data/ext/nmatrix/ruby_constants.h +103 -0
- data/ext/nmatrix/storage/common.cpp +70 -0
- data/ext/nmatrix/storage/common.h +170 -0
- data/ext/nmatrix/storage/dense.cpp +665 -0
- data/ext/nmatrix/storage/dense.h +116 -0
- data/ext/nmatrix/storage/list.cpp +1088 -0
- data/ext/nmatrix/storage/list.h +129 -0
- data/ext/nmatrix/storage/storage.cpp +658 -0
- data/ext/nmatrix/storage/storage.h +99 -0
- data/ext/nmatrix/storage/yale.cpp +1601 -0
- data/ext/nmatrix/storage/yale.h +208 -0
- data/ext/nmatrix/ttable_helper.rb +126 -0
- data/ext/nmatrix/{yale/smmp1_header.template.c → types.h} +36 -9
- data/ext/nmatrix/util/io.cpp +295 -0
- data/ext/nmatrix/util/io.h +117 -0
- data/ext/nmatrix/util/lapack.h +1175 -0
- data/ext/nmatrix/util/math.cpp +557 -0
- data/ext/nmatrix/util/math.h +1363 -0
- data/ext/nmatrix/util/sl_list.cpp +475 -0
- data/ext/nmatrix/util/sl_list.h +255 -0
- data/ext/nmatrix/util/util.h +78 -0
- data/lib/nmatrix/blas.rb +70 -0
- data/lib/nmatrix/io/mat5_reader.rb +567 -0
- data/lib/nmatrix/io/mat_reader.rb +162 -0
- data/lib/{string.rb → nmatrix/monkeys.rb} +49 -2
- data/lib/nmatrix/nmatrix.rb +199 -0
- data/lib/nmatrix/nvector.rb +103 -0
- data/lib/nmatrix/version.rb +27 -0
- data/lib/nmatrix.rb +22 -230
- data/nmatrix.gemspec +59 -0
- data/scripts/mac-brew-gcc.sh +47 -0
- data/spec/4x4_sparse.mat +0 -0
- data/spec/4x5_dense.mat +0 -0
- data/spec/blas_spec.rb +47 -0
- data/spec/elementwise_spec.rb +164 -0
- data/spec/io_spec.rb +60 -0
- data/spec/lapack_spec.rb +52 -0
- data/spec/math_spec.rb +96 -0
- data/spec/nmatrix_spec.rb +93 -89
- data/spec/nmatrix_yale_spec.rb +52 -36
- data/spec/nvector_spec.rb +1 -1
- data/spec/slice_spec.rb +257 -0
- data/spec/spec_helper.rb +51 -0
- data/spec/utm5940.mtx +83844 -0
- metadata +113 -71
- data/.autotest +0 -23
- data/.gemtest +0 -0
- data/ext/nmatrix/cblas.c +0 -150
- data/ext/nmatrix/dense/blas_header.template.c +0 -52
- data/ext/nmatrix/dense/elementwise.template.c +0 -107
- data/ext/nmatrix/dense/gemm.template.c +0 -159
- data/ext/nmatrix/dense/gemv.template.c +0 -130
- data/ext/nmatrix/dense/rationalmath.template.c +0 -68
- data/ext/nmatrix/dense.c +0 -307
- data/ext/nmatrix/depend +0 -18
- data/ext/nmatrix/generator/syntax_tree.rb +0 -481
- data/ext/nmatrix/generator.rb +0 -594
- data/ext/nmatrix/list.c +0 -774
- data/ext/nmatrix/nmatrix.c +0 -1977
- data/ext/nmatrix/rational.c +0 -98
- data/ext/nmatrix/yale/complexmath.template.c +0 -71
- data/ext/nmatrix/yale/elementwise.template.c +0 -46
- data/ext/nmatrix/yale/elementwise_op.template.c +0 -73
- data/ext/nmatrix/yale/numbmm.template.c +0 -94
- data/ext/nmatrix/yale/smmp1.template.c +0 -21
- data/ext/nmatrix/yale/smmp2.template.c +0 -43
- data/ext/nmatrix/yale/smmp2_header.template.c +0 -46
- data/ext/nmatrix/yale/sort_columns.template.c +0 -56
- data/ext/nmatrix/yale/symbmm.template.c +0 -54
- data/ext/nmatrix/yale/transp.template.c +0 -68
- data/ext/nmatrix/yale.c +0 -726
- data/lib/array.rb +0 -67
- data/spec/syntax_tree_spec.rb +0 -46
|
@@ -0,0 +1,1601 @@
|
|
|
1
|
+
/////////////////////////////////////////////////////////////////////
|
|
2
|
+
// = NMatrix
|
|
3
|
+
//
|
|
4
|
+
// A linear algebra library for scientific computation in Ruby.
|
|
5
|
+
// NMatrix is part of SciRuby.
|
|
6
|
+
//
|
|
7
|
+
// NMatrix was originally inspired by and derived from NArray, by
|
|
8
|
+
// Masahiro Tanaka: http://narray.rubyforge.org
|
|
9
|
+
//
|
|
10
|
+
// == Copyright Information
|
|
11
|
+
//
|
|
12
|
+
// SciRuby is Copyright (c) 2010 - 2012, Ruby Science Foundation
|
|
13
|
+
// NMatrix is Copyright (c) 2012, Ruby Science Foundation
|
|
14
|
+
//
|
|
15
|
+
// Please see LICENSE.txt for additional copyright notices.
|
|
16
|
+
//
|
|
17
|
+
// == Contributing
|
|
18
|
+
//
|
|
19
|
+
// By contributing source code to SciRuby, you agree to be bound by
|
|
20
|
+
// our Contributor Agreement:
|
|
21
|
+
//
|
|
22
|
+
// * https://github.com/SciRuby/sciruby/wiki/Contributor-Agreement
|
|
23
|
+
//
|
|
24
|
+
// == yale.c
|
|
25
|
+
//
|
|
26
|
+
// "new yale" storage format for 2D matrices (like yale, but with
|
|
27
|
+
// the diagonal pulled out for O(1) access).
|
|
28
|
+
//
|
|
29
|
+
// Specifications:
|
|
30
|
+
// * dtype and index dtype must necessarily differ
|
|
31
|
+
// * index dtype is defined by whatever unsigned type can store
|
|
32
|
+
// max(rows,cols)
|
|
33
|
+
// * that means vector ija stores only index dtype, but a stores
|
|
34
|
+
// dtype
|
|
35
|
+
// * vectors must be able to grow as necessary
|
|
36
|
+
// * maximum size is rows*cols+1
|
|
37
|
+
|
|
38
|
+
/*
|
|
39
|
+
* Standard Includes
|
|
40
|
+
*/
|
|
41
|
+
|
|
42
|
+
#include <ruby.h>
|
|
43
|
+
#include <algorithm> // std::min
|
|
44
|
+
#include <cstdio> // std::fprintf
|
|
45
|
+
|
|
46
|
+
/*
|
|
47
|
+
* Project Includes
|
|
48
|
+
*/
|
|
49
|
+
|
|
50
|
+
// #include "types.h"
|
|
51
|
+
#include "util/math.h"
|
|
52
|
+
|
|
53
|
+
#include "data/data.h"
|
|
54
|
+
|
|
55
|
+
#include "common.h"
|
|
56
|
+
#include "yale.h"
|
|
57
|
+
|
|
58
|
+
#include "nmatrix.h"
|
|
59
|
+
#include "ruby_constants.h"
|
|
60
|
+
|
|
61
|
+
/*
|
|
62
|
+
* Macros
|
|
63
|
+
*/
|
|
64
|
+
#ifndef NM_MAX
|
|
65
|
+
#define NM_MAX(a,b) (((a)>(b))?(a):(b))
|
|
66
|
+
#define NM_MIN(a,b) (((a)<(b))?(a):(b))
|
|
67
|
+
#endif
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
/*
|
|
71
|
+
* Global Variables
|
|
72
|
+
*/
|
|
73
|
+
|
|
74
|
+
/*
|
|
75
|
+
* Forward Declarations
|
|
76
|
+
*/
|
|
77
|
+
|
|
78
|
+
extern "C" {
|
|
79
|
+
static YALE_STORAGE* nm_copy_alloc_struct(const YALE_STORAGE* rhs, const dtype_t new_dtype, const size_t new_capacity, const size_t new_size);
|
|
80
|
+
static YALE_STORAGE* alloc(dtype_t dtype, size_t* shape, size_t dim);
|
|
81
|
+
|
|
82
|
+
/* Ruby-accessible functions */
|
|
83
|
+
static VALUE nm_size(VALUE self);
|
|
84
|
+
static VALUE nm_a(VALUE self);
|
|
85
|
+
static VALUE nm_d(VALUE self);
|
|
86
|
+
static VALUE nm_lu(VALUE self);
|
|
87
|
+
static VALUE nm_ia(VALUE self);
|
|
88
|
+
static VALUE nm_ja(VALUE self);
|
|
89
|
+
static VALUE nm_ija(VALUE self);
|
|
90
|
+
|
|
91
|
+
} // end extern "C" block
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
namespace nm { namespace yale_storage {
|
|
96
|
+
|
|
97
|
+
template <typename DType, typename IType>
|
|
98
|
+
static bool ndrow_is_empty(const YALE_STORAGE* s, IType ija, const IType ija_next);
|
|
99
|
+
|
|
100
|
+
template <typename LDType, typename RDType, typename IType>
|
|
101
|
+
static bool ndrow_eqeq_ndrow(const YALE_STORAGE* l, const YALE_STORAGE* r, IType l_ija, const IType l_ija_next, IType r_ija, const IType r_ija_next);
|
|
102
|
+
|
|
103
|
+
template <typename LDType, typename RDType, typename IType>
|
|
104
|
+
static bool eqeq(const YALE_STORAGE* left, const YALE_STORAGE* right);
|
|
105
|
+
|
|
106
|
+
template <typename IType>
|
|
107
|
+
static YALE_STORAGE* copy_alloc_struct(const YALE_STORAGE* rhs, const dtype_t new_dtype, const size_t new_capacity, const size_t new_size);
|
|
108
|
+
|
|
109
|
+
template <typename IType>
|
|
110
|
+
static void increment_ia_after(YALE_STORAGE* s, IType ija_size, IType i, IType n);
|
|
111
|
+
|
|
112
|
+
template <typename IType>
|
|
113
|
+
static IType insert_search(YALE_STORAGE* s, IType left, IType right, IType key, bool* found);
|
|
114
|
+
|
|
115
|
+
template <typename IType>
|
|
116
|
+
static inline size_t get_size(const YALE_STORAGE* storage);
|
|
117
|
+
|
|
118
|
+
template <typename DType, typename IType>
|
|
119
|
+
static char vector_insert(YALE_STORAGE* s, size_t pos, size_t* j, DType* val, size_t n, bool struct_only);
|
|
120
|
+
|
|
121
|
+
template <typename DType, typename IType>
|
|
122
|
+
static char vector_insert_resize(YALE_STORAGE* s, size_t current_size, size_t pos, size_t* j, size_t n, bool struct_only);
|
|
123
|
+
|
|
124
|
+
template <typename nm::ewop_t op, typename IType, typename DType>
|
|
125
|
+
YALE_STORAGE* ew_op(const YALE_STORAGE* left, const YALE_STORAGE* right, dtype_t dtype);
|
|
126
|
+
|
|
127
|
+
/*
|
|
128
|
+
* Functions
|
|
129
|
+
*/
|
|
130
|
+
|
|
131
|
+
/*
|
|
132
|
+
* Create Yale storage from IA, JA, and A vectors given in Old Yale format (probably from a file, since NMatrix only uses
|
|
133
|
+
* new Yale for its storage).
|
|
134
|
+
*
|
|
135
|
+
* This function is needed for Matlab .MAT v5 IO.
|
|
136
|
+
*/
|
|
137
|
+
template <typename LDType, typename RDType, typename IType>
|
|
138
|
+
YALE_STORAGE* create_from_old_yale(dtype_t dtype, size_t* shape, void* r_ia, void* r_ja, void* r_a) {
|
|
139
|
+
IType* ir = reinterpret_cast<IType*>(r_ia);
|
|
140
|
+
IType* jr = reinterpret_cast<IType*>(r_ja);
|
|
141
|
+
RDType* ar = reinterpret_cast<RDType*>(r_a);
|
|
142
|
+
|
|
143
|
+
// Read through ia and ja and figure out the ndnz (non-diagonal non-zeros) count.
|
|
144
|
+
size_t ndnz = 0, i, p, p_next;
|
|
145
|
+
|
|
146
|
+
for (i = 0; i < shape[0]; ++i) { // Walk down rows
|
|
147
|
+
for (p = ir[i], p_next = ir[i+1]; p < p_next; ++p) { // Now walk through columns
|
|
148
|
+
|
|
149
|
+
if (i != jr[p]) ++ndnz; // entry is non-diagonal and probably nonzero
|
|
150
|
+
|
|
151
|
+
}
|
|
152
|
+
}
|
|
153
|
+
|
|
154
|
+
// Having walked through the matrix, we now go about allocating the space for it.
|
|
155
|
+
YALE_STORAGE* s = alloc(dtype, shape, 2);
|
|
156
|
+
|
|
157
|
+
s->capacity = shape[0] + ndnz + 1;
|
|
158
|
+
s->ndnz = ndnz;
|
|
159
|
+
|
|
160
|
+
// Setup IJA and A arrays
|
|
161
|
+
s->ija = ALLOC_N( IType, s->capacity );
|
|
162
|
+
s->a = ALLOC_N( LDType, s->capacity );
|
|
163
|
+
IType* ijl = reinterpret_cast<IType*>(s->ija);
|
|
164
|
+
LDType* al = reinterpret_cast<LDType*>(s->a);
|
|
165
|
+
|
|
166
|
+
// set the diagonal to zero -- this prevents uninitialized values from popping up.
|
|
167
|
+
for (size_t index = 0; index < shape[0]; ++index) {
|
|
168
|
+
al[index] = 0;
|
|
169
|
+
}
|
|
170
|
+
|
|
171
|
+
// Figure out where to start writing JA in IJA:
|
|
172
|
+
size_t pp = s->shape[0]+1;
|
|
173
|
+
|
|
174
|
+
// Find beginning of first row
|
|
175
|
+
p = ir[0];
|
|
176
|
+
|
|
177
|
+
// Now fill the arrays
|
|
178
|
+
for (i = 0; i < s->shape[0]; ++i) {
|
|
179
|
+
|
|
180
|
+
// Set the beginning of the row (of output)
|
|
181
|
+
ijl[i] = pp;
|
|
182
|
+
|
|
183
|
+
// Now walk through columns, starting at end of row (of input)
|
|
184
|
+
for (size_t p_next = ir[i+1]; p < p_next; ++p, ++pp) {
|
|
185
|
+
|
|
186
|
+
if (i == jr[p]) { // diagonal
|
|
187
|
+
|
|
188
|
+
al[i] = ar[p];
|
|
189
|
+
--pp;
|
|
190
|
+
|
|
191
|
+
} else { // nondiagonal
|
|
192
|
+
|
|
193
|
+
ijl[pp] = jr[p];
|
|
194
|
+
al[pp] = ar[p];
|
|
195
|
+
|
|
196
|
+
}
|
|
197
|
+
}
|
|
198
|
+
}
|
|
199
|
+
|
|
200
|
+
ijl[i] = pp; // Set the end of the last row
|
|
201
|
+
|
|
202
|
+
// Set the zero position for our output matrix
|
|
203
|
+
al[i] = 0;
|
|
204
|
+
|
|
205
|
+
return s;
|
|
206
|
+
}
|
|
207
|
+
|
|
208
|
+
|
|
209
|
+
/*
|
|
210
|
+
* Take two Yale storages and merge them into a new Yale storage.
|
|
211
|
+
*
|
|
212
|
+
* Uses the left as a template for the creation of a new one.
|
|
213
|
+
*/
|
|
214
|
+
template <typename DType, typename IType>
|
|
215
|
+
YALE_STORAGE* create_merged(const YALE_STORAGE* left, const YALE_STORAGE* right) {
|
|
216
|
+
char ins_type;
|
|
217
|
+
|
|
218
|
+
size_t size = get_size<IType>(left);
|
|
219
|
+
|
|
220
|
+
// s represents the resulting storage
|
|
221
|
+
YALE_STORAGE* s = copy_alloc_struct<IType>(left, left->dtype, NM_MAX(left->capacity, right->capacity), size);
|
|
222
|
+
|
|
223
|
+
IType* sija = reinterpret_cast<IType*>(s->ija);
|
|
224
|
+
IType* rija = reinterpret_cast<IType*>(right->ija);
|
|
225
|
+
|
|
226
|
+
// set the element between D and LU (the boundary in A), which should be 0.
|
|
227
|
+
reinterpret_cast<DType*>(s->a)[s->shape[0]] = reinterpret_cast<DType*>(left->a)[left->shape[0]];
|
|
228
|
+
|
|
229
|
+
if (right && right != left) {
|
|
230
|
+
// some operations are unary and don't need this; others are x+x and don't need this
|
|
231
|
+
|
|
232
|
+
for (IType i = 0; i < s->shape[0]; ++i) {
|
|
233
|
+
|
|
234
|
+
IType ija = sija[i];
|
|
235
|
+
IType ija_next = sija[i+1];
|
|
236
|
+
|
|
237
|
+
for (IType r_ija = rija[i]; r_ija < rija[i+1]; ++r_ija) {
|
|
238
|
+
|
|
239
|
+
size_t ja = sija[ija]; // insert expects a size_t
|
|
240
|
+
|
|
241
|
+
if (ija == ija_next) {
|
|
242
|
+
// destination row is empty
|
|
243
|
+
ins_type = vector_insert<DType,IType>(s, ija, &ja, NULL, 1, true);
|
|
244
|
+
increment_ia_after<IType>(s, s->shape[0], i, 1);
|
|
245
|
+
++(s->ndnz);
|
|
246
|
+
++ija;
|
|
247
|
+
|
|
248
|
+
if (ins_type == 'i') ++ija_next;
|
|
249
|
+
|
|
250
|
+
} else {
|
|
251
|
+
bool found;
|
|
252
|
+
|
|
253
|
+
// merge positions into destination row
|
|
254
|
+
IType pos = insert_search<IType>(s, ija, ija_next-1, sija[ija], &found);
|
|
255
|
+
|
|
256
|
+
if (!found) {
|
|
257
|
+
vector_insert<DType,IType>(s, pos, &ja, NULL, 1, true);
|
|
258
|
+
increment_ia_after<IType>(s, s->shape[0], i, 1);
|
|
259
|
+
++(s->ndnz);
|
|
260
|
+
|
|
261
|
+
if (ins_type == 'i') ++ija_next;
|
|
262
|
+
}
|
|
263
|
+
|
|
264
|
+
// can now set a left boundary for the next search
|
|
265
|
+
ija = pos + 1;
|
|
266
|
+
}
|
|
267
|
+
}
|
|
268
|
+
}
|
|
269
|
+
}
|
|
270
|
+
|
|
271
|
+
return s;
|
|
272
|
+
}
|
|
273
|
+
|
|
274
|
+
|
|
275
|
+
/*
|
|
276
|
+
* Empty the matrix by initializing the IJA vector and setting the diagonal to 0.
|
|
277
|
+
*
|
|
278
|
+
* Called when most YALE_STORAGE objects are created.
|
|
279
|
+
*/
|
|
280
|
+
template <typename DType, typename IType>
|
|
281
|
+
void init(YALE_STORAGE* s) {
|
|
282
|
+
IType IA_INIT = s->shape[0] + 1;
|
|
283
|
+
|
|
284
|
+
IType* ija = reinterpret_cast<IType*>(s->ija);
|
|
285
|
+
// clear out IJA vector
|
|
286
|
+
for (IType i = 0; i < IA_INIT; ++i) {
|
|
287
|
+
ija[i] = IA_INIT; // set initial values for IJA
|
|
288
|
+
}
|
|
289
|
+
|
|
290
|
+
clear_diagonal_and_zero<DType>(s);
|
|
291
|
+
}
|
|
292
|
+
|
|
293
|
+
size_t max_size(YALE_STORAGE* s) {
|
|
294
|
+
size_t result = s->shape[0]*s->shape[1] + 1;
|
|
295
|
+
if (s->shape[0] > s->shape[1])
|
|
296
|
+
result += s->shape[0] - s->shape[1];
|
|
297
|
+
|
|
298
|
+
return result;
|
|
299
|
+
}
|
|
300
|
+
///////////////
|
|
301
|
+
// Accessors //
|
|
302
|
+
///////////////
|
|
303
|
+
|
|
304
|
+
/*
|
|
305
|
+
* Returns a slice of YALE_STORAGE object by coppy
|
|
306
|
+
*
|
|
307
|
+
* Slicing-related.
|
|
308
|
+
*/
|
|
309
|
+
template <typename DType,typename IType>
|
|
310
|
+
void* get(YALE_STORAGE* storage, SLICE* slice) {
|
|
311
|
+
|
|
312
|
+
size_t *offset = slice->coords;
|
|
313
|
+
// Copy shape for yale construction
|
|
314
|
+
size_t* shape = ALLOC_N(size_t, 2);
|
|
315
|
+
shape[0] = slice->lengths[0];
|
|
316
|
+
shape[1] = slice->lengths[1];
|
|
317
|
+
|
|
318
|
+
IType *src_ija = reinterpret_cast<IType*>(storage->ija);
|
|
319
|
+
DType *src_a = reinterpret_cast<DType*>(storage->a);
|
|
320
|
+
|
|
321
|
+
// Calc ndnz
|
|
322
|
+
size_t ndnz = 0;
|
|
323
|
+
size_t i,j; // indexes of destination matrix
|
|
324
|
+
size_t k,l; // indexes of source matrix
|
|
325
|
+
for (i = 0; i < shape[0]; i++) {
|
|
326
|
+
k = i + offset[0];
|
|
327
|
+
for (j = 0; j < shape[1]; j++) {
|
|
328
|
+
l = j + offset[1];
|
|
329
|
+
|
|
330
|
+
if (j == i) continue;
|
|
331
|
+
|
|
332
|
+
if (k == l && src_a[k] != 0) ndnz++; // for diagonal element of source
|
|
333
|
+
else { // for non-diagonal element
|
|
334
|
+
for (size_t c = src_ija[k]; c < src_ija[k+1]; c++)
|
|
335
|
+
if (src_ija[c] == l) { ndnz++; break; }
|
|
336
|
+
}
|
|
337
|
+
|
|
338
|
+
}
|
|
339
|
+
}
|
|
340
|
+
|
|
341
|
+
size_t request_capacity = shape[0] + ndnz + 1;
|
|
342
|
+
YALE_STORAGE* ns = nm_yale_storage_create(storage->dtype, shape, 2, request_capacity);
|
|
343
|
+
|
|
344
|
+
if (ns->capacity < request_capacity)
|
|
345
|
+
rb_raise(nm_eStorageTypeError, "conversion failed; capacity of %ld requested, max allowable is %ld", request_capacity, ns->capacity);
|
|
346
|
+
|
|
347
|
+
// Initialize the A and IJA arrays
|
|
348
|
+
init<DType,IType>(ns);
|
|
349
|
+
IType *dst_ija = reinterpret_cast<IType*>(ns->ija);
|
|
350
|
+
DType *dst_a = reinterpret_cast<DType*>(ns->a);
|
|
351
|
+
|
|
352
|
+
size_t ija = shape[0] + 1;
|
|
353
|
+
DType val;
|
|
354
|
+
for (i = 0; i < shape[0]; ++i) {
|
|
355
|
+
k = i + offset[0];
|
|
356
|
+
for (j = 0; j < shape[1]; ++j) {
|
|
357
|
+
l = j + offset[1];
|
|
358
|
+
|
|
359
|
+
// Get value from source matrix
|
|
360
|
+
if (k == l) val = src_a[k];
|
|
361
|
+
else {
|
|
362
|
+
// copy non-diagonal element
|
|
363
|
+
for (size_t c = src_ija[k]; c < src_ija[k+1]; ++c) {
|
|
364
|
+
if (src_ija[c] == l) val = src_a[c];
|
|
365
|
+
}
|
|
366
|
+
}
|
|
367
|
+
|
|
368
|
+
// Set value to destination matrix
|
|
369
|
+
if (i == j) dst_a[i] = val;
|
|
370
|
+
else {
|
|
371
|
+
// copy non-diagonal element
|
|
372
|
+
dst_ija[ija] = j;
|
|
373
|
+
dst_a[ija] = val;
|
|
374
|
+
|
|
375
|
+
++ija;
|
|
376
|
+
for (size_t c = i + 1; c <= shape[0]; ++c) {
|
|
377
|
+
dst_ija[c] = ija;
|
|
378
|
+
}
|
|
379
|
+
}
|
|
380
|
+
}
|
|
381
|
+
}
|
|
382
|
+
|
|
383
|
+
dst_ija[shape[0]] = ija; // indicate the end of the last row
|
|
384
|
+
ns->ndnz = ndnz;
|
|
385
|
+
return ns;
|
|
386
|
+
}
|
|
387
|
+
/*
|
|
388
|
+
* Returns a pointer to the correct location in the A vector of a YALE_STORAGE object, given some set of coordinates
|
|
389
|
+
* (the coordinates are stored in slice).
|
|
390
|
+
*/
|
|
391
|
+
template <typename DType,typename IType>
|
|
392
|
+
void* ref(YALE_STORAGE* storage, SLICE* slice) {
|
|
393
|
+
size_t* coords = slice->coords;
|
|
394
|
+
|
|
395
|
+
if (!slice->single) rb_raise(rb_eNotImpError, "This type slicing not supported yet.");
|
|
396
|
+
|
|
397
|
+
DType* a = reinterpret_cast<DType*>(storage->a);
|
|
398
|
+
IType* ija = reinterpret_cast<IType*>(storage->ija);
|
|
399
|
+
|
|
400
|
+
if (coords[0] == coords[1])
|
|
401
|
+
return &(a[ coords[0] ]); // return diagonal entry
|
|
402
|
+
|
|
403
|
+
if (ija[coords[0]] == ija[coords[0]+1])
|
|
404
|
+
return &(a[ storage->shape[0] ]); // return zero pointer
|
|
405
|
+
|
|
406
|
+
// binary search for the column's location
|
|
407
|
+
int pos = binary_search<IType>(storage,
|
|
408
|
+
ija[coords[0]],
|
|
409
|
+
ija[coords[0]+1]-1,
|
|
410
|
+
coords[1]);
|
|
411
|
+
|
|
412
|
+
if (pos != -1 && ija[pos] == coords[1])
|
|
413
|
+
return &(a[pos]); // found exact value
|
|
414
|
+
|
|
415
|
+
return &(a[ storage->shape[0] ]); // return a pointer that happens to be zero
|
|
416
|
+
}
|
|
417
|
+
|
|
418
|
+
/*
|
|
419
|
+
* Attempt to set some cell in a YALE_STORAGE object. Must supply coordinates and a pointer to a value (which will be
|
|
420
|
+
* copied into the storage object).
|
|
421
|
+
*/
|
|
422
|
+
template <typename DType, typename IType>
|
|
423
|
+
char set(YALE_STORAGE* storage, SLICE* slice, void* value) {
|
|
424
|
+
DType* v = reinterpret_cast<DType*>(value);
|
|
425
|
+
size_t* coords = slice->coords;
|
|
426
|
+
|
|
427
|
+
bool found = false;
|
|
428
|
+
char ins_type;
|
|
429
|
+
|
|
430
|
+
if (coords[0] == coords[1]) {
|
|
431
|
+
reinterpret_cast<DType*>(storage->a)[coords[0]] = *v; // set diagonal
|
|
432
|
+
return 'r';
|
|
433
|
+
}
|
|
434
|
+
|
|
435
|
+
// Get IJA positions of the beginning and end of the row
|
|
436
|
+
if (reinterpret_cast<IType*>(storage->ija)[coords[0]] == reinterpret_cast<IType*>(storage->ija)[coords[0]+1]) {
|
|
437
|
+
// empty row
|
|
438
|
+
ins_type = vector_insert<DType,IType>(storage, reinterpret_cast<IType*>(storage->ija)[coords[0]], &(coords[1]), v, 1, false);
|
|
439
|
+
increment_ia_after<IType>(storage, storage->shape[0], coords[0], 1);
|
|
440
|
+
storage->ndnz++;
|
|
441
|
+
|
|
442
|
+
return ins_type;
|
|
443
|
+
}
|
|
444
|
+
|
|
445
|
+
// non-empty row. search for coords[1] in the IJA array, between ija and ija_next
|
|
446
|
+
// (including ija, not including ija_next)
|
|
447
|
+
//ija_size = get_size<IType>(storage);
|
|
448
|
+
|
|
449
|
+
// Do a binary search for the column
|
|
450
|
+
size_t pos = insert_search<IType>(storage,
|
|
451
|
+
reinterpret_cast<IType*>(storage->ija)[coords[0]],
|
|
452
|
+
reinterpret_cast<IType*>(storage->ija)[coords[0]+1]-1,
|
|
453
|
+
coords[1], &found);
|
|
454
|
+
|
|
455
|
+
if (found) { // replace
|
|
456
|
+
reinterpret_cast<IType*>(storage->ija)[pos] = coords[1];
|
|
457
|
+
reinterpret_cast<DType*>(storage->a)[pos] = *v;
|
|
458
|
+
return 'r';
|
|
459
|
+
}
|
|
460
|
+
|
|
461
|
+
ins_type = vector_insert<DType,IType>(storage, pos, &(coords[1]), v, 1, false);
|
|
462
|
+
increment_ia_after<IType>(storage, storage->shape[0], coords[0], 1);
|
|
463
|
+
storage->ndnz++;
|
|
464
|
+
|
|
465
|
+
return ins_type;
|
|
466
|
+
}
|
|
467
|
+
|
|
468
|
+
///////////
|
|
469
|
+
// Tests //
|
|
470
|
+
///////////
|
|
471
|
+
|
|
472
|
+
/*
|
|
473
|
+
* Yale eql? -- for whole-matrix comparison returning a single value.
|
|
474
|
+
*/
|
|
475
|
+
template <typename LDType, typename RDType, typename IType>
|
|
476
|
+
static bool eqeq(const YALE_STORAGE* left, const YALE_STORAGE* right) {
|
|
477
|
+
LDType* la = reinterpret_cast<LDType*>(left->a);
|
|
478
|
+
RDType* ra = reinterpret_cast<RDType*>(right->a);
|
|
479
|
+
|
|
480
|
+
// Compare the diagonals first.
|
|
481
|
+
for (size_t index = 0; index < left->shape[0]; ++index) {
|
|
482
|
+
if (la[index] != ra[index]) return false;
|
|
483
|
+
}
|
|
484
|
+
|
|
485
|
+
IType* lij = reinterpret_cast<IType*>(left->ija);
|
|
486
|
+
IType* rij = reinterpret_cast<IType*>(right->ija);
|
|
487
|
+
|
|
488
|
+
for (IType i = 0; i < left->shape[0]; ++i) {
|
|
489
|
+
|
|
490
|
+
// Get start and end positions of row
|
|
491
|
+
IType l_ija = lij[i],
|
|
492
|
+
l_ija_next = lij[i+1],
|
|
493
|
+
r_ija = rij[i],
|
|
494
|
+
r_ija_next = rij[i+1];
|
|
495
|
+
|
|
496
|
+
// Check to see if one row is empty and the other isn't.
|
|
497
|
+
if (ndrow_is_empty<LDType,IType>(left, l_ija, l_ija_next)) {
|
|
498
|
+
if (!ndrow_is_empty<RDType,IType>(right, r_ija, r_ija_next)) {
|
|
499
|
+
return false;
|
|
500
|
+
}
|
|
501
|
+
|
|
502
|
+
} else if (ndrow_is_empty<RDType,IType>(right, r_ija, r_ija_next)) {
|
|
503
|
+
// one is empty but the other isn't
|
|
504
|
+
return false;
|
|
505
|
+
|
|
506
|
+
} else if (!ndrow_eqeq_ndrow<LDType,RDType,IType>(left, right, l_ija, l_ija_next, r_ija, r_ija_next)) {
|
|
507
|
+
// Neither row is empty. Must compare the rows directly.
|
|
508
|
+
return false;
|
|
509
|
+
}
|
|
510
|
+
|
|
511
|
+
}
|
|
512
|
+
|
|
513
|
+
return true;
|
|
514
|
+
}
|
|
515
|
+
|
|
516
|
+
/*
|
|
517
|
+
* Are two non-diagonal rows the same? We already know.
|
|
518
|
+
*/
|
|
519
|
+
template <typename LDType, typename RDType, typename IType>
|
|
520
|
+
static bool ndrow_eqeq_ndrow(const YALE_STORAGE* l, const YALE_STORAGE* r, IType l_ija, const IType l_ija_next, IType r_ija, const IType r_ija_next) {
|
|
521
|
+
bool l_no_more = false, r_no_more = false;
|
|
522
|
+
|
|
523
|
+
IType *lij = reinterpret_cast<IType*>(l->ija),
|
|
524
|
+
*rij = reinterpret_cast<IType*>(r->ija);
|
|
525
|
+
|
|
526
|
+
LDType* la = reinterpret_cast<LDType*>(l->a);
|
|
527
|
+
RDType* ra = reinterpret_cast<RDType*>(r->a);
|
|
528
|
+
|
|
529
|
+
IType l_ja = lij[l_ija],
|
|
530
|
+
r_ja = rij[r_ija];
|
|
531
|
+
|
|
532
|
+
IType ja = std::min(l_ja, r_ja);
|
|
533
|
+
|
|
534
|
+
while (!(l_no_more && r_no_more)) {
|
|
535
|
+
if (l_ja == r_ja) {
|
|
536
|
+
|
|
537
|
+
if (ra[r_ija] != la[l_ija]) return false; // Direct comparison
|
|
538
|
+
|
|
539
|
+
++l_ija;
|
|
540
|
+
++r_ija;
|
|
541
|
+
|
|
542
|
+
if (l_ija < l_ija_next) {
|
|
543
|
+
l_ja = lij[l_ija];
|
|
544
|
+
|
|
545
|
+
} else {
|
|
546
|
+
l_no_more = true;
|
|
547
|
+
}
|
|
548
|
+
|
|
549
|
+
if (r_ija < r_ija_next) {
|
|
550
|
+
r_ja = rij[r_ija];
|
|
551
|
+
|
|
552
|
+
} else {
|
|
553
|
+
r_no_more = true;
|
|
554
|
+
}
|
|
555
|
+
|
|
556
|
+
ja = std::min(l_ja, r_ja);
|
|
557
|
+
|
|
558
|
+
} else if (l_no_more || ja < l_ja) {
|
|
559
|
+
|
|
560
|
+
if (ra[r_ija] != 0) return false;
|
|
561
|
+
|
|
562
|
+
++r_ija;
|
|
563
|
+
if (r_ija < r_ija_next) {
|
|
564
|
+
// get next column
|
|
565
|
+
r_ja = rij[r_ija];
|
|
566
|
+
ja = std::min(l_ja, r_ja);
|
|
567
|
+
|
|
568
|
+
} else {
|
|
569
|
+
l_no_more = true;
|
|
570
|
+
}
|
|
571
|
+
|
|
572
|
+
} else if (r_no_more || ja < r_ja) {
|
|
573
|
+
|
|
574
|
+
if (la[l_ija] != 0) return false;
|
|
575
|
+
|
|
576
|
+
++l_ija;
|
|
577
|
+
if (l_ija < l_ija_next) {
|
|
578
|
+
// get next column
|
|
579
|
+
l_ja = lij[l_ija];
|
|
580
|
+
ja = std::min(l_ja, r_ja);
|
|
581
|
+
} else {
|
|
582
|
+
l_no_more = true;
|
|
583
|
+
}
|
|
584
|
+
|
|
585
|
+
} else {
|
|
586
|
+
std::fprintf(stderr, "Unhandled in eqeq: l_ja=%d, r_ja=%d\n", (int)l_ja, (int)r_ja);
|
|
587
|
+
}
|
|
588
|
+
}
|
|
589
|
+
|
|
590
|
+
// every item matched
|
|
591
|
+
return true;
|
|
592
|
+
}
|
|
593
|
+
|
|
594
|
+
/*
|
|
595
|
+
* Is the non-diagonal portion of the row empty?
|
|
596
|
+
*/
|
|
597
|
+
template <typename DType, typename IType>
|
|
598
|
+
static bool ndrow_is_empty(const YALE_STORAGE* s, IType ija, const IType ija_next) {
|
|
599
|
+
if (ija == ija_next) return true;
|
|
600
|
+
|
|
601
|
+
DType* a = reinterpret_cast<DType*>(s->a);
|
|
602
|
+
|
|
603
|
+
// do all the entries = zero?
|
|
604
|
+
for (; ija < ija_next; ++ija) {
|
|
605
|
+
if (a[ija] != 0) return false;
|
|
606
|
+
}
|
|
607
|
+
|
|
608
|
+
return true;
|
|
609
|
+
}
|
|
610
|
+
|
|
611
|
+
//////////
|
|
612
|
+
// Math //
|
|
613
|
+
//////////
|
|
614
|
+
|
|
615
|
+
#define YALE_IA(s) (reinterpret_cast<IType*>(s->ija))
|
|
616
|
+
#define YALE_IJ(s) (reinterpret_cast<IType*>(s->ija) + s->shape[0] + 1)
|
|
617
|
+
#define YALE_COUNT(yale) (yale->ndnz + yale->shape[0])
|
|
618
|
+
|
|
619
|
+
template <typename nm::ewop_t op, typename IType, typename DType>
|
|
620
|
+
YALE_STORAGE* ew_op(const YALE_STORAGE* left, const YALE_STORAGE* right, dtype_t dtype) {
|
|
621
|
+
size_t init_capacity;
|
|
622
|
+
size_t* new_shape;
|
|
623
|
+
|
|
624
|
+
unsigned int da_index,
|
|
625
|
+
la_index,
|
|
626
|
+
ra_index,
|
|
627
|
+
|
|
628
|
+
a_index_offset,
|
|
629
|
+
|
|
630
|
+
la_row_max,
|
|
631
|
+
ra_row_max,
|
|
632
|
+
|
|
633
|
+
row_index;
|
|
634
|
+
|
|
635
|
+
DType tmp_result;
|
|
636
|
+
|
|
637
|
+
DType * la = reinterpret_cast<DType*> (left->a),
|
|
638
|
+
* ra = reinterpret_cast<DType*>(right->a),
|
|
639
|
+
* da;
|
|
640
|
+
|
|
641
|
+
YALE_STORAGE* dest;
|
|
642
|
+
|
|
643
|
+
new_shape = reinterpret_cast<size_t*>(calloc(2, sizeof(size_t)));
|
|
644
|
+
new_shape[0] = left->shape[0];
|
|
645
|
+
new_shape[1] = left->shape[1];
|
|
646
|
+
|
|
647
|
+
init_capacity = std::min(left->ndnz + right->ndnz + new_shape[0], new_shape[0] * new_shape[1]);
|
|
648
|
+
|
|
649
|
+
dest = nm_yale_storage_create(dtype, new_shape, 2, init_capacity);
|
|
650
|
+
da = reinterpret_cast<DType*>(dest->a);
|
|
651
|
+
|
|
652
|
+
// Calculate diagonal values.
|
|
653
|
+
for (da_index = 0; da_index < dest->shape[0]; ++da_index) {
|
|
654
|
+
da[da_index] = ew_op_switch<op, DType, DType>(la[da_index], ra[da_index]);
|
|
655
|
+
}
|
|
656
|
+
|
|
657
|
+
// Set the zero representation seperator.
|
|
658
|
+
da[da_index] = typeid(DType) == typeid(RubyObject) ? INT2FIX(0) : 0;
|
|
659
|
+
|
|
660
|
+
/*
|
|
661
|
+
* Calculate the offset between start of the A arrays and the non-diagonal
|
|
662
|
+
* entries.
|
|
663
|
+
*/
|
|
664
|
+
a_index_offset = dest->shape[0] + 1;
|
|
665
|
+
|
|
666
|
+
// Re-base the A arrays.
|
|
667
|
+
la = la + a_index_offset;
|
|
668
|
+
ra = ra + a_index_offset;
|
|
669
|
+
da = da + a_index_offset;
|
|
670
|
+
|
|
671
|
+
// Initialize our A array indices.
|
|
672
|
+
la_index = ra_index = da_index = 0;
|
|
673
|
+
|
|
674
|
+
// Calculate the non-diagonal values.
|
|
675
|
+
for (row_index = 0; row_index < dest->shape[0]; ++row_index) {
|
|
676
|
+
/*
|
|
677
|
+
* Each row.
|
|
678
|
+
*/
|
|
679
|
+
|
|
680
|
+
printf("Row %d\n", row_index);
|
|
681
|
+
|
|
682
|
+
// Get row bounds.
|
|
683
|
+
la_row_max = YALE_IA( left)[row_index + 1] - a_index_offset;
|
|
684
|
+
ra_row_max = YALE_IA(right)[row_index + 1] - a_index_offset;
|
|
685
|
+
|
|
686
|
+
printf("Left : Row Start: %d - Row End %d\n", la_index + a_index_offset, la_row_max + a_index_offset);
|
|
687
|
+
printf("Right : Row Start: %d - Row End %d\n", ra_index + a_index_offset, ra_row_max + a_index_offset);
|
|
688
|
+
|
|
689
|
+
/*
|
|
690
|
+
* Set this row's left bound (which is also the previous row's right
|
|
691
|
+
* bound).
|
|
692
|
+
*/
|
|
693
|
+
YALE_IA(dest)[row_index] = da_index + a_index_offset;
|
|
694
|
+
|
|
695
|
+
printf("Left bound of row %d in destination: %d\n", (int)row_index, (int)YALE_IA(dest)[row_index]);
|
|
696
|
+
|
|
697
|
+
// Iterate over non-diagonal entries in this row.
|
|
698
|
+
while (la_index < la_row_max and ra_index < ra_row_max) {
|
|
699
|
+
/*
|
|
700
|
+
* Elements are present on both the left- and right-hand side.
|
|
701
|
+
*/
|
|
702
|
+
|
|
703
|
+
printf("Marker 0\n");
|
|
704
|
+
|
|
705
|
+
if (YALE_IJ(left)[la_index] == YALE_IJ(right)[ra_index]) {
|
|
706
|
+
/*
|
|
707
|
+
* Current left- and right-hand values are in the same row and
|
|
708
|
+
* column.
|
|
709
|
+
*/
|
|
710
|
+
|
|
711
|
+
printf("Calculating value for [%d, %d].\n", (int)row_index, (int)YALE_IJ(left)[la_index]);
|
|
712
|
+
|
|
713
|
+
tmp_result = ew_op_switch<op, DType, DType>(la[la_index], ra[ra_index]);
|
|
714
|
+
|
|
715
|
+
if (tmp_result != 0) {
|
|
716
|
+
printf("Setting value for [%d, %d] at index %d in destination's A array.\n", (int)row_index, (int)YALE_IJ(left)[la_index], (int)(da_index + a_index_offset));
|
|
717
|
+
|
|
718
|
+
da[da_index] = tmp_result;
|
|
719
|
+
YALE_IJ(dest)[da_index] = YALE_IJ(left)[la_index];
|
|
720
|
+
|
|
721
|
+
++da_index;
|
|
722
|
+
|
|
723
|
+
} else {
|
|
724
|
+
printf("Result was 0. Skipping.\n");
|
|
725
|
+
}
|
|
726
|
+
|
|
727
|
+
++la_index;
|
|
728
|
+
++ra_index;
|
|
729
|
+
|
|
730
|
+
} else if (YALE_IJ(left)[la_index] < YALE_IJ(right)[ra_index]) {
|
|
731
|
+
/*
|
|
732
|
+
* The right-hand index is ahead of the left-hand index.
|
|
733
|
+
*/
|
|
734
|
+
|
|
735
|
+
if (op != EW_MUL) {
|
|
736
|
+
// If this is multiplion there is no point in doing the operation.
|
|
737
|
+
|
|
738
|
+
tmp_result = ew_op_switch<op, DType, DType>(la[la_index], typeid(DType) == typeid(RubyObject) ? INT2FIX(0) : 0);
|
|
739
|
+
|
|
740
|
+
printf("Setting value for [%d, %d].\n", (int)row_index, (int)YALE_IJ(left)[la_index]);
|
|
741
|
+
|
|
742
|
+
if (tmp_result != 0) {
|
|
743
|
+
da[da_index] = tmp_result;
|
|
744
|
+
YALE_IJ(dest)[da_index] = YALE_IJ(left)[la_index];
|
|
745
|
+
|
|
746
|
+
++da_index;
|
|
747
|
+
}
|
|
748
|
+
}
|
|
749
|
+
|
|
750
|
+
++la_index;
|
|
751
|
+
|
|
752
|
+
} else {
|
|
753
|
+
/*
|
|
754
|
+
* The left-hand index is ahead of the right-hand index.
|
|
755
|
+
*/
|
|
756
|
+
|
|
757
|
+
if (op != EW_MUL) {
|
|
758
|
+
// If this is multiplion there is no point in doing the operation.
|
|
759
|
+
|
|
760
|
+
tmp_result = ew_op_switch<op, DType, DType>(typeid(DType) == typeid(RubyObject) ? INT2FIX(0) : 0, ra[ra_index]);
|
|
761
|
+
|
|
762
|
+
printf("Setting value for [%d, %d].\n", (int)row_index, (int)YALE_IJ(right)[ra_index]);
|
|
763
|
+
|
|
764
|
+
if (tmp_result != 0) {
|
|
765
|
+
da[da_index] = tmp_result;
|
|
766
|
+
YALE_IJ(dest)[da_index] = YALE_IJ(right)[ra_index];
|
|
767
|
+
|
|
768
|
+
++da_index;
|
|
769
|
+
}
|
|
770
|
+
}
|
|
771
|
+
|
|
772
|
+
++ra_index;
|
|
773
|
+
}
|
|
774
|
+
}
|
|
775
|
+
|
|
776
|
+
if (op != EW_MUL) {
|
|
777
|
+
/*
|
|
778
|
+
* Process the remaining elements on the left- or right-hand side. One or
|
|
779
|
+
* the other, or neither, of the following loops may execute, but not
|
|
780
|
+
* both.
|
|
781
|
+
*
|
|
782
|
+
* If we are doing multiplication this is unnecessary as all remaining
|
|
783
|
+
* operations will produce a zero value.
|
|
784
|
+
*/
|
|
785
|
+
|
|
786
|
+
while (la_index < la_row_max) {
|
|
787
|
+
/*
|
|
788
|
+
* Process the remaining elements on the left-hand side.
|
|
789
|
+
*/
|
|
790
|
+
|
|
791
|
+
printf("Marker 1\n");
|
|
792
|
+
|
|
793
|
+
tmp_result = ew_op_switch<op, DType, DType>(la[la_index], typeid(DType) == typeid(RubyObject) ? INT2FIX(0) : 0);
|
|
794
|
+
|
|
795
|
+
printf("Setting value for [%d, %d].\n", (int)row_index, (int)YALE_IJ(left)[la_index]);
|
|
796
|
+
|
|
797
|
+
if (tmp_result != 0) {
|
|
798
|
+
da[da_index] = tmp_result;
|
|
799
|
+
YALE_IJ(dest)[da_index] = YALE_IJ(left)[la_index];
|
|
800
|
+
|
|
801
|
+
++da_index;
|
|
802
|
+
}
|
|
803
|
+
|
|
804
|
+
++la_index;
|
|
805
|
+
}
|
|
806
|
+
|
|
807
|
+
while (ra_index < ra_row_max) {
|
|
808
|
+
/*
|
|
809
|
+
* Process the remaining elements on the right-hand side.
|
|
810
|
+
*/
|
|
811
|
+
|
|
812
|
+
printf("Marker 2\n");
|
|
813
|
+
|
|
814
|
+
tmp_result = ew_op_switch<op, DType, DType>(typeid(DType) == typeid(RubyObject) ? INT2FIX(0) : 0, ra[ra_index]);
|
|
815
|
+
|
|
816
|
+
printf("Setting value for [%d, %d].\n", (int)row_index, (int)YALE_IJ(right)[ra_index]);
|
|
817
|
+
|
|
818
|
+
if (tmp_result != 0) {
|
|
819
|
+
da[da_index] = tmp_result;
|
|
820
|
+
YALE_IJ(dest)[da_index] = YALE_IJ(right)[ra_index];
|
|
821
|
+
|
|
822
|
+
++da_index;
|
|
823
|
+
}
|
|
824
|
+
|
|
825
|
+
++ra_index;
|
|
826
|
+
}
|
|
827
|
+
}
|
|
828
|
+
|
|
829
|
+
// Advance the row indices.
|
|
830
|
+
la_index = la_row_max;
|
|
831
|
+
ra_index = ra_row_max;
|
|
832
|
+
|
|
833
|
+
printf("End of row %d\n\n", row_index);
|
|
834
|
+
}
|
|
835
|
+
|
|
836
|
+
// Set the last row's right bound.
|
|
837
|
+
YALE_IA(dest)[row_index] = da_index + a_index_offset;
|
|
838
|
+
|
|
839
|
+
printf("Right bound of row %d in destination: %d\n", row_index - 1, da_index + a_index_offset);
|
|
840
|
+
|
|
841
|
+
// Set the number of non-diagonal non-zero entries in the destination matrix.
|
|
842
|
+
dest->ndnz = da_index;
|
|
843
|
+
|
|
844
|
+
printf("Number of non-diagonal non-zero entires: %ld\n\n", (unsigned long)(dest->ndnz));
|
|
845
|
+
|
|
846
|
+
// Set the capacity of the destination matrix.
|
|
847
|
+
dest->capacity = dest->shape[0] + dest->ndnz + 1;
|
|
848
|
+
|
|
849
|
+
// Resize the destination matrix.
|
|
850
|
+
dest->a = realloc(dest->a, sizeof(DType) * dest->capacity);
|
|
851
|
+
dest->ija = realloc(dest->ija, sizeof(IType) * dest->capacity);
|
|
852
|
+
|
|
853
|
+
return dest;
|
|
854
|
+
}
|
|
855
|
+
|
|
856
|
+
/////////////
|
|
857
|
+
// Utility //
|
|
858
|
+
/////////////
|
|
859
|
+
|
|
860
|
+
/*
|
|
861
|
+
* Binary search for returning stored values. Returns a non-negative position, or -1 for not found.
|
|
862
|
+
*/
|
|
863
|
+
template <typename IType>
|
|
864
|
+
int binary_search(YALE_STORAGE* s, IType left, IType right, IType key) {
|
|
865
|
+
|
|
866
|
+
if (left > right) return -1;
|
|
867
|
+
|
|
868
|
+
IType* ija = reinterpret_cast<IType*>(s->ija);
|
|
869
|
+
|
|
870
|
+
IType mid = (left + right)/2;
|
|
871
|
+
IType mid_j = ija[mid];
|
|
872
|
+
|
|
873
|
+
if (mid_j == key)
|
|
874
|
+
return mid;
|
|
875
|
+
|
|
876
|
+
else if (mid_j > key)
|
|
877
|
+
return binary_search<IType>(s, left, mid - 1, key);
|
|
878
|
+
|
|
879
|
+
else
|
|
880
|
+
return binary_search<IType>(s, mid + 1, right, key);
|
|
881
|
+
}
|
|
882
|
+
|
|
883
|
+
|
|
884
|
+
|
|
885
|
+
/*
|
|
886
|
+
* Resize yale storage vectors A and IJA in preparation for an insertion.
|
|
887
|
+
*/
|
|
888
|
+
template <typename DType, typename IType>
|
|
889
|
+
static char vector_insert_resize(YALE_STORAGE* s, size_t current_size, size_t pos, size_t* j, size_t n, bool struct_only) {
|
|
890
|
+
// Determine the new capacity for the IJA and A vectors.
|
|
891
|
+
size_t new_capacity = s->capacity * GROWTH_CONSTANT;
|
|
892
|
+
size_t max_capacity = max_size(s);
|
|
893
|
+
|
|
894
|
+
if (new_capacity > max_capacity) {
|
|
895
|
+
new_capacity = max_capacity;
|
|
896
|
+
|
|
897
|
+
if (current_size + n > max_capacity) rb_raise(rb_eNoMemError, "insertion size exceeded maximum yale matrix size");
|
|
898
|
+
}
|
|
899
|
+
|
|
900
|
+
if (new_capacity < current_size + n)
|
|
901
|
+
new_capacity = current_size + n;
|
|
902
|
+
|
|
903
|
+
// Allocate the new vectors.
|
|
904
|
+
IType* new_ija = ALLOC_N( IType, new_capacity );
|
|
905
|
+
NM_CHECK_ALLOC(new_ija);
|
|
906
|
+
|
|
907
|
+
DType* new_a = ALLOC_N( DType, new_capacity );
|
|
908
|
+
NM_CHECK_ALLOC(new_a);
|
|
909
|
+
|
|
910
|
+
IType* old_ija = reinterpret_cast<IType*>(s->ija);
|
|
911
|
+
DType* old_a = reinterpret_cast<DType*>(s->a);
|
|
912
|
+
|
|
913
|
+
// Copy all values prior to the insertion site to the new IJA and new A
|
|
914
|
+
if (struct_only) {
|
|
915
|
+
for (size_t i = 0; i < pos; ++i) {
|
|
916
|
+
new_ija[i] = old_ija[i];
|
|
917
|
+
}
|
|
918
|
+
} else {
|
|
919
|
+
for (size_t i = 0; i < pos; ++i) {
|
|
920
|
+
new_ija[i] = old_ija[i];
|
|
921
|
+
new_a[i] = old_a[i];
|
|
922
|
+
}
|
|
923
|
+
}
|
|
924
|
+
|
|
925
|
+
|
|
926
|
+
// Copy all values subsequent to the insertion site to the new IJA and new A, leaving room (size n) for insertion.
|
|
927
|
+
if (struct_only) {
|
|
928
|
+
for (size_t i = pos; i < current_size - pos + n - 1; ++i) {
|
|
929
|
+
new_ija[i+n] = old_ija[i];
|
|
930
|
+
}
|
|
931
|
+
} else {
|
|
932
|
+
for (size_t i = pos; i < current_size - pos + n - 1; ++i) {
|
|
933
|
+
new_ija[i+n] = old_ija[i];
|
|
934
|
+
new_a[i+n] = old_a[i];
|
|
935
|
+
}
|
|
936
|
+
}
|
|
937
|
+
|
|
938
|
+
s->capacity = new_capacity;
|
|
939
|
+
|
|
940
|
+
free(s->ija);
|
|
941
|
+
free(s->a);
|
|
942
|
+
|
|
943
|
+
s->ija = reinterpret_cast<void*>(new_ija);
|
|
944
|
+
s->a = reinterpret_cast<void*>(new_a);
|
|
945
|
+
|
|
946
|
+
return 'i';
|
|
947
|
+
}
|
|
948
|
+
|
|
949
|
+
/*
|
|
950
|
+
* Insert a value or contiguous values in the ija and a vectors (after ja and
|
|
951
|
+
* diag). Does not free anything; you are responsible!
|
|
952
|
+
*
|
|
953
|
+
* TODO: Improve this so it can handle non-contiguous element insertions
|
|
954
|
+
* efficiently. For now, we can just sort the elements in the row in
|
|
955
|
+
* question.)
|
|
956
|
+
*/
|
|
957
|
+
template <typename DType, typename IType>
|
|
958
|
+
static char vector_insert(YALE_STORAGE* s, size_t pos, size_t* j, DType* val, size_t n, bool struct_only) {
|
|
959
|
+
if (pos < s->shape[0]) {
|
|
960
|
+
rb_raise(rb_eArgError, "vector insert pos is before beginning of ja; this should not happen");
|
|
961
|
+
}
|
|
962
|
+
|
|
963
|
+
size_t size = get_size<IType>(s);
|
|
964
|
+
|
|
965
|
+
IType* ija = reinterpret_cast<IType*>(s->ija);
|
|
966
|
+
DType* a = reinterpret_cast<DType*>(s->a);
|
|
967
|
+
|
|
968
|
+
if (size + n > s->capacity) {
|
|
969
|
+
vector_insert_resize<DType,IType>(s, size, pos, j, n, struct_only);
|
|
970
|
+
|
|
971
|
+
// Need to get the new locations for ija and a.
|
|
972
|
+
ija = reinterpret_cast<IType*>(s->ija);
|
|
973
|
+
a = reinterpret_cast<DType*>(s->a);
|
|
974
|
+
|
|
975
|
+
} else {
|
|
976
|
+
/*
|
|
977
|
+
* No resize required:
|
|
978
|
+
* easy (but somewhat slow), just copy elements to the tail, starting at
|
|
979
|
+
* the end, one element at a time.
|
|
980
|
+
*
|
|
981
|
+
* TODO: This can be made slightly more efficient, but only after the tests
|
|
982
|
+
* are written.
|
|
983
|
+
*/
|
|
984
|
+
|
|
985
|
+
if (struct_only) {
|
|
986
|
+
for (size_t i = 0; i < size - pos; ++i) {
|
|
987
|
+
ija[size+n-1-i] = ija[size-1-i];
|
|
988
|
+
}
|
|
989
|
+
} else {
|
|
990
|
+
for (size_t i = 0; i < size - pos; ++i) {
|
|
991
|
+
ija[size+n-1-i] = ija[size-1-i];
|
|
992
|
+
a[size+n-1-i] = a[size-1-i];
|
|
993
|
+
}
|
|
994
|
+
}
|
|
995
|
+
}
|
|
996
|
+
|
|
997
|
+
// Now insert the new values.
|
|
998
|
+
if (struct_only) {
|
|
999
|
+
for (size_t i = 0; i < n; ++i) {
|
|
1000
|
+
ija[pos+i] = j[i];
|
|
1001
|
+
}
|
|
1002
|
+
} else {
|
|
1003
|
+
for (size_t i = 0; i < n; ++i) {
|
|
1004
|
+
ija[pos+i] = j[i];
|
|
1005
|
+
a[pos+i] = val[i];
|
|
1006
|
+
}
|
|
1007
|
+
}
|
|
1008
|
+
|
|
1009
|
+
return 'i';
|
|
1010
|
+
}
|
|
1011
|
+
|
|
1012
|
+
/*
|
|
1013
|
+
* If we add n items to row i, we need to increment ija[i+1] and onward.
|
|
1014
|
+
*/
|
|
1015
|
+
template <typename IType>
|
|
1016
|
+
static void increment_ia_after(YALE_STORAGE* s, IType ija_size, IType i, IType n) {
|
|
1017
|
+
IType* ija = reinterpret_cast<IType*>(s->ija);
|
|
1018
|
+
|
|
1019
|
+
++i;
|
|
1020
|
+
for (; i <= ija_size; ++i) {
|
|
1021
|
+
ija[i] += n;
|
|
1022
|
+
}
|
|
1023
|
+
}
|
|
1024
|
+
|
|
1025
|
+
/*
|
|
1026
|
+
* Binary search for returning insertion points.
|
|
1027
|
+
*/
|
|
1028
|
+
template <typename IType>
|
|
1029
|
+
static IType insert_search(YALE_STORAGE* s, IType left, IType right, IType key, bool* found) {
|
|
1030
|
+
|
|
1031
|
+
if (left > right) {
|
|
1032
|
+
*found = false;
|
|
1033
|
+
return left;
|
|
1034
|
+
}
|
|
1035
|
+
|
|
1036
|
+
IType* ija = reinterpret_cast<IType*>(s->ija);
|
|
1037
|
+
IType mid = (left + right)/2;
|
|
1038
|
+
IType mid_j = ija[mid];
|
|
1039
|
+
|
|
1040
|
+
if (mid_j == key) {
|
|
1041
|
+
*found = true;
|
|
1042
|
+
return mid;
|
|
1043
|
+
|
|
1044
|
+
} else if (mid_j > key) {
|
|
1045
|
+
return insert_search<IType>(s, left, mid-1, key, found);
|
|
1046
|
+
|
|
1047
|
+
} else {
|
|
1048
|
+
return insert_search<IType>(s, mid+1, right, key, found);
|
|
1049
|
+
}
|
|
1050
|
+
}
|
|
1051
|
+
|
|
1052
|
+
/////////////////////////
|
|
1053
|
+
// Copying and Casting //
|
|
1054
|
+
/////////////////////////
|
|
1055
|
+
|
|
1056
|
+
/*
|
|
1057
|
+
* Templated copy constructor for changing dtypes.
|
|
1058
|
+
*/
|
|
1059
|
+
template <typename LDType, typename RDType, typename IType>
|
|
1060
|
+
YALE_STORAGE* cast_copy(const YALE_STORAGE* rhs, dtype_t new_dtype) {
|
|
1061
|
+
|
|
1062
|
+
// Allocate a new structure
|
|
1063
|
+
size_t size = get_size<IType>(rhs);
|
|
1064
|
+
YALE_STORAGE* lhs = copy_alloc_struct<IType>(rhs, new_dtype, rhs->capacity, size);
|
|
1065
|
+
|
|
1066
|
+
if (rhs->dtype == new_dtype) { // FIXME: Test if this condition is actually faster; second condition should work just as well.
|
|
1067
|
+
|
|
1068
|
+
memcpy(lhs->a, rhs->a, size * DTYPE_SIZES[new_dtype]);
|
|
1069
|
+
|
|
1070
|
+
} else {
|
|
1071
|
+
|
|
1072
|
+
LDType* la = reinterpret_cast<LDType*>(lhs->a);
|
|
1073
|
+
RDType* ra = reinterpret_cast<RDType*>(rhs->a);
|
|
1074
|
+
|
|
1075
|
+
for (size_t index = 0; index < size; ++index) {
|
|
1076
|
+
la[index] = ra[index];
|
|
1077
|
+
}
|
|
1078
|
+
|
|
1079
|
+
}
|
|
1080
|
+
|
|
1081
|
+
return lhs;
|
|
1082
|
+
}
|
|
1083
|
+
|
|
1084
|
+
/*
|
|
1085
|
+
* Template access for getting the size of Yale storage.
|
|
1086
|
+
*/
|
|
1087
|
+
template <typename IType>
|
|
1088
|
+
static inline size_t get_size(const YALE_STORAGE* storage) {
|
|
1089
|
+
return static_cast<size_t>(reinterpret_cast<IType*>(storage->ija)[ storage->shape[0] ]);
|
|
1090
|
+
}
|
|
1091
|
+
|
|
1092
|
+
/*
|
|
1093
|
+
* Allocate for a copy or copy-cast operation, and copy the IJA portion of the
|
|
1094
|
+
* matrix (the structure).
|
|
1095
|
+
*/
|
|
1096
|
+
template <typename IType>
|
|
1097
|
+
static YALE_STORAGE* copy_alloc_struct(const YALE_STORAGE* rhs, const dtype_t new_dtype, const size_t new_capacity, const size_t new_size) {
|
|
1098
|
+
YALE_STORAGE* lhs = ALLOC( YALE_STORAGE );
|
|
1099
|
+
lhs->dim = rhs->dim;
|
|
1100
|
+
lhs->shape = ALLOC_N( size_t, lhs->dim );
|
|
1101
|
+
memcpy(lhs->shape, rhs->shape, lhs->dim * sizeof(size_t));
|
|
1102
|
+
lhs->itype = rhs->itype;
|
|
1103
|
+
lhs->capacity = new_capacity;
|
|
1104
|
+
lhs->dtype = new_dtype;
|
|
1105
|
+
lhs->ndnz = rhs->ndnz;
|
|
1106
|
+
|
|
1107
|
+
lhs->ija = ALLOC_N( IType, lhs->capacity );
|
|
1108
|
+
lhs->a = ALLOC_N( char, DTYPE_SIZES[new_dtype] * lhs->capacity );
|
|
1109
|
+
|
|
1110
|
+
// Now copy the contents -- but only within the boundaries set by the size. Leave
|
|
1111
|
+
// the rest uninitialized.
|
|
1112
|
+
for (size_t i = 0; i < get_size<IType>(rhs); ++i)
|
|
1113
|
+
reinterpret_cast<IType*>(lhs->ija)[i] = reinterpret_cast<IType*>(rhs->ija)[i]; // copy indices
|
|
1114
|
+
|
|
1115
|
+
return lhs;
|
|
1116
|
+
}
|
|
1117
|
+
|
|
1118
|
+
template <typename DType, typename IType>
|
|
1119
|
+
static STORAGE* matrix_multiply(const STORAGE_PAIR& casted_storage, size_t* resulting_shape, bool vector) {
|
|
1120
|
+
YALE_STORAGE *left = (YALE_STORAGE*)(casted_storage.left),
|
|
1121
|
+
*right = (YALE_STORAGE*)(casted_storage.right);
|
|
1122
|
+
|
|
1123
|
+
// We can safely get dtype from the casted matrices; post-condition of binary_storage_cast_alloc is that dtype is the
|
|
1124
|
+
// same for left and right.
|
|
1125
|
+
// int8_t dtype = left->dtype;
|
|
1126
|
+
|
|
1127
|
+
// Create result storage.
|
|
1128
|
+
YALE_STORAGE* result = nm_yale_storage_create(left->dtype, resulting_shape, 2, left->capacity + right->capacity);
|
|
1129
|
+
init<DType,IType>(result);
|
|
1130
|
+
|
|
1131
|
+
IType* ijl = reinterpret_cast<IType*>(left->ija);
|
|
1132
|
+
IType* ijr = reinterpret_cast<IType*>(right->ija);
|
|
1133
|
+
IType* ija = reinterpret_cast<IType*>(result->ija);
|
|
1134
|
+
|
|
1135
|
+
// Symbolic multiplication step (build the structure)
|
|
1136
|
+
nm::math::symbmm<IType>(result->shape[0], result->shape[1], ijl, ijl, true, ijr, ijr, true, ija, true);
|
|
1137
|
+
|
|
1138
|
+
// Numeric multiplication step (fill in the elements)
|
|
1139
|
+
nm::math::numbmm<DType,IType>(result->shape[0], result->shape[1],
|
|
1140
|
+
ijl, ijl, reinterpret_cast<DType*>(left->a), true,
|
|
1141
|
+
ijr, ijr, reinterpret_cast<DType*>(right->a), true,
|
|
1142
|
+
ija, ija, reinterpret_cast<DType*>(result->a), true);
|
|
1143
|
+
|
|
1144
|
+
// Sort the columns
|
|
1145
|
+
nm::math::smmp_sort_columns<DType,IType>(result->shape[0], ija, ija, reinterpret_cast<DType*>(result->a));
|
|
1146
|
+
|
|
1147
|
+
return reinterpret_cast<STORAGE*>(result);
|
|
1148
|
+
}
|
|
1149
|
+
|
|
1150
|
+
}} // end of namespace nm::yale_storage.
|
|
1151
|
+
|
|
1152
|
+
///////////////////
|
|
1153
|
+
// Ruby Bindings //
|
|
1154
|
+
///////////////////
|
|
1155
|
+
|
|
1156
|
+
/* These bindings are mostly only for debugging Yale. They are called from Init_nmatrix. */
|
|
1157
|
+
|
|
1158
|
+
extern "C" {
|
|
1159
|
+
|
|
1160
|
+
void nm_init_yale_functions() {
|
|
1161
|
+
cNMatrix_YaleFunctions = rb_define_module_under(cNMatrix, "YaleFunctions");
|
|
1162
|
+
|
|
1163
|
+
rb_define_method(cNMatrix_YaleFunctions, "yale_ija", (METHOD)nm_ija, 0);
|
|
1164
|
+
rb_define_method(cNMatrix_YaleFunctions, "yale_a", (METHOD)nm_a, 0);
|
|
1165
|
+
rb_define_method(cNMatrix_YaleFunctions, "yale_size", (METHOD)nm_size, 0);
|
|
1166
|
+
rb_define_method(cNMatrix_YaleFunctions, "yale_ia", (METHOD)nm_ia, 0);
|
|
1167
|
+
rb_define_method(cNMatrix_YaleFunctions, "yale_ja", (METHOD)nm_ja, 0);
|
|
1168
|
+
rb_define_method(cNMatrix_YaleFunctions, "yale_d", (METHOD)nm_d, 0);
|
|
1169
|
+
rb_define_method(cNMatrix_YaleFunctions, "yale_lu", (METHOD)nm_lu, 0);
|
|
1170
|
+
rb_define_const(cNMatrix_YaleFunctions, "YALE_GROWTH_CONSTANT", rb_float_new(nm::yale_storage::GROWTH_CONSTANT));
|
|
1171
|
+
}
|
|
1172
|
+
|
|
1173
|
+
|
|
1174
|
+
/////////////////
|
|
1175
|
+
// C ACCESSORS //
|
|
1176
|
+
/////////////////
|
|
1177
|
+
|
|
1178
|
+
/*
|
|
1179
|
+
* C accessor for inserting some value in a matrix (or replacing an existing cell).
|
|
1180
|
+
*/
|
|
1181
|
+
char nm_yale_storage_set(STORAGE* storage, SLICE* slice, void* v) {
|
|
1182
|
+
NAMED_LI_DTYPE_TEMPLATE_TABLE(ttable, nm::yale_storage::set, char, YALE_STORAGE* storage, SLICE* slice, void* value);
|
|
1183
|
+
|
|
1184
|
+
YALE_STORAGE* casted_storage = (YALE_STORAGE*)storage;
|
|
1185
|
+
|
|
1186
|
+
return ttable[casted_storage->dtype][casted_storage->itype](casted_storage, slice, v);
|
|
1187
|
+
}
|
|
1188
|
+
|
|
1189
|
+
/*
|
|
1190
|
+
* C accessor for yale_storage::get, which returns a slice of YALE_STORAGE object by coppy
|
|
1191
|
+
*
|
|
1192
|
+
* Slicing-related.
|
|
1193
|
+
*/
|
|
1194
|
+
void* nm_yale_storage_get(STORAGE* storage, SLICE* slice) {
|
|
1195
|
+
NAMED_LI_DTYPE_TEMPLATE_TABLE(ttable, nm::yale_storage::get, void*, YALE_STORAGE* storage, SLICE* slice);
|
|
1196
|
+
YALE_STORAGE* s = (YALE_STORAGE*)storage;
|
|
1197
|
+
|
|
1198
|
+
|
|
1199
|
+
YALE_STORAGE* casted_storage = (YALE_STORAGE*)storage;
|
|
1200
|
+
return ttable[casted_storage->dtype][casted_storage->itype](casted_storage, slice);
|
|
1201
|
+
}
|
|
1202
|
+
|
|
1203
|
+
/*
|
|
1204
|
+
* C accessor for yale_storage::ref, which returns a pointer to the correct location in a YALE_STORAGE object
|
|
1205
|
+
* for some set of coordinates.
|
|
1206
|
+
*/
|
|
1207
|
+
void* nm_yale_storage_ref(STORAGE* storage, SLICE* slice) {
|
|
1208
|
+
NAMED_LI_DTYPE_TEMPLATE_TABLE(ttable, nm::yale_storage::ref, void*, YALE_STORAGE* storage, SLICE* slice);
|
|
1209
|
+
|
|
1210
|
+
YALE_STORAGE* casted_storage = (YALE_STORAGE*)storage;
|
|
1211
|
+
return ttable[casted_storage->dtype][casted_storage->itype](casted_storage, slice);
|
|
1212
|
+
}
|
|
1213
|
+
|
|
1214
|
+
/*
|
|
1215
|
+
* C accessor for determining whether two YALE_STORAGE objects have the same contents.
|
|
1216
|
+
*
|
|
1217
|
+
* FIXME: Is this for element-wise or whole-matrix equality?
|
|
1218
|
+
*/
|
|
1219
|
+
bool nm_yale_storage_eqeq(const STORAGE* left, const STORAGE* right) {
|
|
1220
|
+
NAMED_LRI_DTYPE_TEMPLATE_TABLE(ttable, nm::yale_storage::eqeq, bool, const YALE_STORAGE* left, const YALE_STORAGE* right);
|
|
1221
|
+
|
|
1222
|
+
const YALE_STORAGE* casted_left = reinterpret_cast<const YALE_STORAGE*>(left);
|
|
1223
|
+
|
|
1224
|
+
return ttable[casted_left->dtype][right->dtype][casted_left->itype](casted_left, (const YALE_STORAGE*)right);
|
|
1225
|
+
}
|
|
1226
|
+
|
|
1227
|
+
/*
|
|
1228
|
+
* Copy constructor for changing dtypes. (C accessor)
|
|
1229
|
+
*/
|
|
1230
|
+
STORAGE* nm_yale_storage_cast_copy(const STORAGE* rhs, dtype_t new_dtype) {
|
|
1231
|
+
NAMED_LRI_DTYPE_TEMPLATE_TABLE(ttable, nm::yale_storage::cast_copy, YALE_STORAGE*, const YALE_STORAGE* rhs, dtype_t new_dtype);
|
|
1232
|
+
|
|
1233
|
+
const YALE_STORAGE* casted_rhs = reinterpret_cast<const YALE_STORAGE*>(rhs);
|
|
1234
|
+
|
|
1235
|
+
return (STORAGE*)ttable[new_dtype][casted_rhs->dtype][casted_rhs->itype](casted_rhs, new_dtype);
|
|
1236
|
+
}
|
|
1237
|
+
|
|
1238
|
+
/*
|
|
1239
|
+
* Returns size of Yale storage as a size_t (no matter what the itype is). (C accessor)
|
|
1240
|
+
*/
|
|
1241
|
+
inline size_t nm_yale_storage_get_size(const YALE_STORAGE* storage) {
|
|
1242
|
+
NAMED_ITYPE_TEMPLATE_TABLE(ttable, nm::yale_storage::get_size, size_t, const YALE_STORAGE* storage);
|
|
1243
|
+
|
|
1244
|
+
return ttable[storage->itype](storage);
|
|
1245
|
+
}
|
|
1246
|
+
|
|
1247
|
+
/*
|
|
1248
|
+
* C accessor for allocating a yale storage object for cast-copying. Copies the IJA vector, does not copy the A vector.
|
|
1249
|
+
*/
|
|
1250
|
+
static YALE_STORAGE* nm_copy_alloc_struct(const YALE_STORAGE* rhs, const dtype_t new_dtype, const size_t new_capacity, const size_t new_size) {
|
|
1251
|
+
NAMED_ITYPE_TEMPLATE_TABLE(ttable, nm::yale_storage::copy_alloc_struct, YALE_STORAGE*, const YALE_STORAGE* rhs, const dtype_t new_dtype, const size_t new_capacity, const size_t new_size);
|
|
1252
|
+
|
|
1253
|
+
return ttable[rhs->itype](rhs, new_dtype, new_capacity, new_size);
|
|
1254
|
+
}
|
|
1255
|
+
|
|
1256
|
+
/*
|
|
1257
|
+
* Transposing copy constructor.
|
|
1258
|
+
*/
|
|
1259
|
+
STORAGE* nm_yale_storage_copy_transposed(const STORAGE* rhs_base) {
|
|
1260
|
+
YALE_STORAGE* rhs = (YALE_STORAGE*)rhs_base;
|
|
1261
|
+
|
|
1262
|
+
size_t* shape = ALLOC_N(size_t, 2);
|
|
1263
|
+
shape[0] = rhs->shape[1];
|
|
1264
|
+
shape[1] = rhs->shape[0];
|
|
1265
|
+
|
|
1266
|
+
size_t size = nm_yale_storage_get_size(rhs);
|
|
1267
|
+
|
|
1268
|
+
YALE_STORAGE* lhs = nm_yale_storage_create(rhs->dtype, shape, 2, size);
|
|
1269
|
+
nm_yale_storage_init(lhs);
|
|
1270
|
+
|
|
1271
|
+
NAMED_LI_DTYPE_TEMPLATE_TABLE(transp, nm::math::transpose_yale, void, const size_t n, const size_t m, const void* ia_, const void* ja_, const void* a_, const bool diaga, void* ib_, void* jb_, void* b_, const bool move);
|
|
1272
|
+
|
|
1273
|
+
transp[lhs->dtype][lhs->itype](rhs->shape[0], rhs->shape[1], rhs->ija, rhs->ija, rhs->a, true, lhs->ija, lhs->ija, lhs->a, true);
|
|
1274
|
+
|
|
1275
|
+
return (STORAGE*)lhs;
|
|
1276
|
+
}
|
|
1277
|
+
|
|
1278
|
+
/*
|
|
1279
|
+
* C accessor for multiplying two YALE_STORAGE matrices, which have already been casted to the same dtype.
|
|
1280
|
+
*
|
|
1281
|
+
* FIXME: What happens if the two matrices have different itypes?
|
|
1282
|
+
*/
|
|
1283
|
+
STORAGE* nm_yale_storage_matrix_multiply(const STORAGE_PAIR& casted_storage, size_t* resulting_shape, bool vector) {
|
|
1284
|
+
LI_DTYPE_TEMPLATE_TABLE(nm::yale_storage::matrix_multiply, STORAGE*, const STORAGE_PAIR& casted_storage, size_t* resulting_shape, bool vector);
|
|
1285
|
+
|
|
1286
|
+
YALE_STORAGE* storage_access = (YALE_STORAGE*)(casted_storage.left);
|
|
1287
|
+
|
|
1288
|
+
return ttable[storage_access->dtype][storage_access->itype](casted_storage, resulting_shape, vector);
|
|
1289
|
+
}
|
|
1290
|
+
|
|
1291
|
+
/*
|
|
1292
|
+
* Documentation goes here.
|
|
1293
|
+
*/
|
|
1294
|
+
STORAGE* nm_yale_storage_ew_op(nm::ewop_t op, const STORAGE* left, const STORAGE* right) {
|
|
1295
|
+
OP_ITYPE_DTYPE_TEMPLATE_TABLE(nm::yale_storage::ew_op, YALE_STORAGE*, const YALE_STORAGE*, const YALE_STORAGE*, dtype_t);
|
|
1296
|
+
|
|
1297
|
+
YALE_STORAGE* new_l = NULL, * new_r = NULL;
|
|
1298
|
+
YALE_STORAGE* result;
|
|
1299
|
+
|
|
1300
|
+
const YALE_STORAGE* casted_l, * casted_r;
|
|
1301
|
+
|
|
1302
|
+
dtype_t new_dtype;
|
|
1303
|
+
|
|
1304
|
+
if (left->dtype != right->dtype) {
|
|
1305
|
+
|
|
1306
|
+
new_dtype = Upcast[left->dtype][right->dtype];
|
|
1307
|
+
|
|
1308
|
+
if (left->dtype != new_dtype) {
|
|
1309
|
+
new_l = reinterpret_cast<YALE_STORAGE*>(nm_yale_storage_cast_copy( left, new_dtype));
|
|
1310
|
+
}
|
|
1311
|
+
|
|
1312
|
+
if (right->dtype != new_dtype) {
|
|
1313
|
+
new_r = reinterpret_cast<YALE_STORAGE*>(nm_yale_storage_cast_copy(right, new_dtype));
|
|
1314
|
+
}
|
|
1315
|
+
|
|
1316
|
+
if (static_cast<uint8_t>(op) < nm::NUM_NONCOMP_EWOPS) {
|
|
1317
|
+
result = ttable[op][new_l->itype][new_dtype]( left->dtype == new_dtype ?
|
|
1318
|
+
reinterpret_cast<const YALE_STORAGE*>( left) :
|
|
1319
|
+
reinterpret_cast<const YALE_STORAGE*>(new_l),
|
|
1320
|
+
|
|
1321
|
+
right->dtype == new_dtype ?
|
|
1322
|
+
reinterpret_cast<const YALE_STORAGE*>(right) :
|
|
1323
|
+
reinterpret_cast<const YALE_STORAGE*>(new_r),
|
|
1324
|
+
|
|
1325
|
+
new_dtype);
|
|
1326
|
+
|
|
1327
|
+
} else {
|
|
1328
|
+
rb_raise(rb_eNotImpError, "Elementwise comparison is not yet implemented for the Yale storage class.");
|
|
1329
|
+
}
|
|
1330
|
+
|
|
1331
|
+
if (new_l != NULL) {
|
|
1332
|
+
nm_yale_storage_delete(new_l);
|
|
1333
|
+
}
|
|
1334
|
+
|
|
1335
|
+
if (new_r != NULL) {
|
|
1336
|
+
nm_yale_storage_delete(new_r);
|
|
1337
|
+
}
|
|
1338
|
+
|
|
1339
|
+
return result;
|
|
1340
|
+
|
|
1341
|
+
} else {
|
|
1342
|
+
|
|
1343
|
+
casted_l = reinterpret_cast<const YALE_STORAGE*>( left);
|
|
1344
|
+
casted_r = reinterpret_cast<const YALE_STORAGE*>(right);
|
|
1345
|
+
|
|
1346
|
+
if (static_cast<uint8_t>(op) < nm::NUM_NONCOMP_EWOPS) {
|
|
1347
|
+
|
|
1348
|
+
return ttable[op][casted_l->itype][casted_l->dtype](casted_l, casted_r, casted_l->dtype);
|
|
1349
|
+
|
|
1350
|
+
} else {
|
|
1351
|
+
rb_raise(rb_eNotImpError, "Elementwise comparison is not yet implemented for the Yale storage class.");
|
|
1352
|
+
}
|
|
1353
|
+
}
|
|
1354
|
+
}
|
|
1355
|
+
|
|
1356
|
+
///////////////
|
|
1357
|
+
// Lifecycle //
|
|
1358
|
+
///////////////
|
|
1359
|
+
|
|
1360
|
+
/*
|
|
1361
|
+
* C accessor function for creating a YALE_STORAGE object. Prior to calling this function, you MUST
|
|
1362
|
+
* allocate shape (should be size_t * 2) -- don't use use a regular size_t array!
|
|
1363
|
+
*
|
|
1364
|
+
* For this type, dim must always be 2. The final argument is the initial capacity with which to
|
|
1365
|
+
* create the storage.
|
|
1366
|
+
*/
|
|
1367
|
+
|
|
1368
|
+
YALE_STORAGE* nm_yale_storage_create(dtype_t dtype, size_t* shape, size_t dim, size_t init_capacity) {
|
|
1369
|
+
YALE_STORAGE* s;
|
|
1370
|
+
size_t max_capacity;
|
|
1371
|
+
|
|
1372
|
+
// FIXME: This error should be handled in the nmatrix.c file.
|
|
1373
|
+
if (dim != 2) {
|
|
1374
|
+
rb_raise(rb_eNotImpError, "Can only support 2D matrices");
|
|
1375
|
+
}
|
|
1376
|
+
|
|
1377
|
+
s = alloc(dtype, shape, dim);
|
|
1378
|
+
max_capacity = nm::yale_storage::max_size(s);
|
|
1379
|
+
|
|
1380
|
+
// Set matrix capacity (and ensure its validity)
|
|
1381
|
+
if (init_capacity < NM_YALE_MINIMUM(s)) {
|
|
1382
|
+
s->capacity = NM_YALE_MINIMUM(s);
|
|
1383
|
+
|
|
1384
|
+
} else if (init_capacity > max_capacity) {
|
|
1385
|
+
// Don't allow storage to be created larger than necessary
|
|
1386
|
+
s->capacity = max_capacity;
|
|
1387
|
+
|
|
1388
|
+
} else {
|
|
1389
|
+
s->capacity = init_capacity;
|
|
1390
|
+
|
|
1391
|
+
}
|
|
1392
|
+
|
|
1393
|
+
s->ija = ALLOC_N( char, ITYPE_SIZES[s->itype] * s->capacity );
|
|
1394
|
+
s->a = ALLOC_N( char, DTYPE_SIZES[s->dtype] * s->capacity );
|
|
1395
|
+
|
|
1396
|
+
return s;
|
|
1397
|
+
}
|
|
1398
|
+
|
|
1399
|
+
/*
|
|
1400
|
+
* Destructor for yale storage (C-accessible).
|
|
1401
|
+
*/
|
|
1402
|
+
void nm_yale_storage_delete(STORAGE* s) {
|
|
1403
|
+
if (s) {
|
|
1404
|
+
YALE_STORAGE* storage = (YALE_STORAGE*)s;
|
|
1405
|
+
free(storage->shape);
|
|
1406
|
+
free(storage->ija);
|
|
1407
|
+
free(storage->a);
|
|
1408
|
+
free(storage);
|
|
1409
|
+
}
|
|
1410
|
+
}
|
|
1411
|
+
|
|
1412
|
+
/*
|
|
1413
|
+
* C accessor for yale_storage::init, a templated function.
|
|
1414
|
+
*
|
|
1415
|
+
* Initializes the IJA vector of the YALE_STORAGE matrix.
|
|
1416
|
+
*/
|
|
1417
|
+
void nm_yale_storage_init(YALE_STORAGE* s) {
|
|
1418
|
+
NAMED_LI_DTYPE_TEMPLATE_TABLE(ttable, nm::yale_storage::init, void, YALE_STORAGE* s);
|
|
1419
|
+
|
|
1420
|
+
ttable[s->dtype][s->itype](s);
|
|
1421
|
+
}
|
|
1422
|
+
|
|
1423
|
+
/*
|
|
1424
|
+
* Ruby GC mark function for YALE_STORAGE. C accessible.
|
|
1425
|
+
*/
|
|
1426
|
+
void nm_yale_storage_mark(void* storage_base) {
|
|
1427
|
+
YALE_STORAGE* storage = (YALE_STORAGE*)storage_base;
|
|
1428
|
+
size_t i;
|
|
1429
|
+
|
|
1430
|
+
if (storage && storage->dtype == RUBYOBJ) {
|
|
1431
|
+
for (i = storage->capacity; i-- > 0;) {
|
|
1432
|
+
rb_gc_mark(*((VALUE*)((char*)(storage->a) + i*DTYPE_SIZES[RUBYOBJ])));
|
|
1433
|
+
}
|
|
1434
|
+
}
|
|
1435
|
+
}
|
|
1436
|
+
|
|
1437
|
+
/*
|
|
1438
|
+
* Allocates and initializes the basic struct (but not the IJA or A vectors).
|
|
1439
|
+
*/
|
|
1440
|
+
static YALE_STORAGE* alloc(dtype_t dtype, size_t* shape, size_t dim) {
|
|
1441
|
+
YALE_STORAGE* s;
|
|
1442
|
+
|
|
1443
|
+
s = ALLOC( YALE_STORAGE );
|
|
1444
|
+
|
|
1445
|
+
s->ndnz = 0;
|
|
1446
|
+
s->dtype = dtype;
|
|
1447
|
+
s->shape = shape;
|
|
1448
|
+
s->dim = dim;
|
|
1449
|
+
s->itype = nm_yale_storage_itype_by_shape(shape);
|
|
1450
|
+
|
|
1451
|
+
return s;
|
|
1452
|
+
}
|
|
1453
|
+
|
|
1454
|
+
YALE_STORAGE* nm_yale_storage_create_from_old_yale(dtype_t dtype, size_t* shape, void* ia, void* ja, void* a, dtype_t from_dtype) {
|
|
1455
|
+
|
|
1456
|
+
NAMED_LRI_DTYPE_TEMPLATE_TABLE(ttable, nm::yale_storage::create_from_old_yale, YALE_STORAGE*, dtype_t dtype, size_t* shape, void* r_ia, void* r_ja, void* r_a);
|
|
1457
|
+
|
|
1458
|
+
// With C++ templates, we don't want to have a 4-parameter template. That would be LDType, RDType, LIType, RIType.
|
|
1459
|
+
// We can prevent that by copying ia and ja into the correct itype (if necessary) before passing them to the yale
|
|
1460
|
+
// copy constructor.
|
|
1461
|
+
itype_t to_itype = nm_yale_storage_itype_by_shape(shape);
|
|
1462
|
+
|
|
1463
|
+
return ttable[dtype][from_dtype][to_itype](dtype, shape, ia, ja, a);
|
|
1464
|
+
|
|
1465
|
+
}
|
|
1466
|
+
|
|
1467
|
+
//////////////////////////////////////////////
|
|
1468
|
+
// YALE-SPECIFIC FUNCTIONS (RUBY ACCESSORS) //
|
|
1469
|
+
//////////////////////////////////////////////
|
|
1470
|
+
|
|
1471
|
+
/*
|
|
1472
|
+
* Get the size of a Yale matrix (the number of elements actually stored).
|
|
1473
|
+
*
|
|
1474
|
+
* For capacity (the maximum number of elements that can be stored without a resize), use capacity instead.
|
|
1475
|
+
*/
|
|
1476
|
+
static VALUE nm_size(VALUE self) {
|
|
1477
|
+
YALE_STORAGE* s = (YALE_STORAGE*)NM_STORAGE(self);
|
|
1478
|
+
|
|
1479
|
+
return rubyobj_from_cval_by_itype((char*)(s->ija) + ITYPE_SIZES[s->itype]*(s->shape[0]), s->itype).rval;
|
|
1480
|
+
}
|
|
1481
|
+
|
|
1482
|
+
|
|
1483
|
+
/*
|
|
1484
|
+
* Get the A array of a Yale matrix (which stores the diagonal and the LU portions of the matrix).
|
|
1485
|
+
*/
|
|
1486
|
+
static VALUE nm_a(VALUE self) {
|
|
1487
|
+
YALE_STORAGE* s = NM_STORAGE_YALE(self);
|
|
1488
|
+
|
|
1489
|
+
size_t size = nm_yale_storage_get_size(s);
|
|
1490
|
+
VALUE* vals = ALLOCA_N(VALUE, size);
|
|
1491
|
+
|
|
1492
|
+
for (size_t i = 0; i < size; ++i) {
|
|
1493
|
+
vals[i] = rubyobj_from_cval((char*)(s->a) + DTYPE_SIZES[s->dtype]*i, s->dtype).rval;
|
|
1494
|
+
}
|
|
1495
|
+
VALUE ary = rb_ary_new4(size, vals);
|
|
1496
|
+
|
|
1497
|
+
for (size_t i = size; i < s->capacity; ++i)
|
|
1498
|
+
rb_ary_push(ary, Qnil);
|
|
1499
|
+
|
|
1500
|
+
return ary;
|
|
1501
|
+
}
|
|
1502
|
+
|
|
1503
|
+
|
|
1504
|
+
/*
|
|
1505
|
+
* Get the diagonal ("D") portion of the A array of a Yale matrix.
|
|
1506
|
+
*/
|
|
1507
|
+
static VALUE nm_d(VALUE self) {
|
|
1508
|
+
YALE_STORAGE* s = NM_STORAGE_YALE(self);
|
|
1509
|
+
|
|
1510
|
+
VALUE* vals = ALLOCA_N(VALUE, s->shape[0]);
|
|
1511
|
+
|
|
1512
|
+
for (size_t i = 0; i < s->shape[0]; ++i) {
|
|
1513
|
+
vals[i] = rubyobj_from_cval((char*)(s->a) + DTYPE_SIZES[s->dtype]*i, s->dtype).rval;
|
|
1514
|
+
}
|
|
1515
|
+
return rb_ary_new4(s->shape[0], vals);
|
|
1516
|
+
}
|
|
1517
|
+
|
|
1518
|
+
/*
|
|
1519
|
+
* Get the non-diagonal ("LU") portion of the A array of a Yale matrix.
|
|
1520
|
+
*/
|
|
1521
|
+
static VALUE nm_lu(VALUE self) {
|
|
1522
|
+
YALE_STORAGE* s = NM_STORAGE_YALE(self);
|
|
1523
|
+
|
|
1524
|
+
size_t size = nm_yale_storage_get_size(s);
|
|
1525
|
+
|
|
1526
|
+
VALUE* vals = ALLOCA_N(VALUE, size - s->shape[0] - 1);
|
|
1527
|
+
|
|
1528
|
+
for (size_t i = 0; i < size - s->shape[0] - 1; ++i) {
|
|
1529
|
+
vals[i] = rubyobj_from_cval((char*)(s->a) + DTYPE_SIZES[s->dtype]*(s->shape[0] + 1 + i), s->dtype).rval;
|
|
1530
|
+
}
|
|
1531
|
+
|
|
1532
|
+
VALUE ary = rb_ary_new4(size - s->shape[0] - 1, vals);
|
|
1533
|
+
|
|
1534
|
+
for (size_t i = size; i < s->capacity; ++i)
|
|
1535
|
+
rb_ary_push(ary, Qnil);
|
|
1536
|
+
|
|
1537
|
+
return ary;
|
|
1538
|
+
}
|
|
1539
|
+
|
|
1540
|
+
/*
|
|
1541
|
+
* Get the IA portion of the IJA array of a Yale matrix. This gives the start and end positions of rows in the
|
|
1542
|
+
* JA and LU portions of the IJA and A arrays, respectively.
|
|
1543
|
+
*/
|
|
1544
|
+
static VALUE nm_ia(VALUE self) {
|
|
1545
|
+
YALE_STORAGE* s = NM_STORAGE_YALE(self);
|
|
1546
|
+
|
|
1547
|
+
VALUE* vals = ALLOCA_N(VALUE, s->shape[0] + 1);
|
|
1548
|
+
|
|
1549
|
+
for (size_t i = 0; i < s->shape[0] + 1; ++i) {
|
|
1550
|
+
vals[i] = rubyobj_from_cval_by_itype((char*)(s->ija) + ITYPE_SIZES[s->itype]*i, s->itype).rval;
|
|
1551
|
+
}
|
|
1552
|
+
|
|
1553
|
+
return rb_ary_new4(s->shape[0]+1, vals);
|
|
1554
|
+
}
|
|
1555
|
+
|
|
1556
|
+
/*
|
|
1557
|
+
* Get the JA portion of the IJA array of a Yale matrix. This gives the column indices for entries in corresponding
|
|
1558
|
+
* positions in the LU portion of the A array.
|
|
1559
|
+
*/
|
|
1560
|
+
static VALUE nm_ja(VALUE self) {
|
|
1561
|
+
YALE_STORAGE* s = NM_STORAGE_YALE(self);
|
|
1562
|
+
|
|
1563
|
+
size_t size = nm_yale_storage_get_size(s);
|
|
1564
|
+
|
|
1565
|
+
VALUE* vals = ALLOCA_N(VALUE, size - s->shape[0] - 1);
|
|
1566
|
+
|
|
1567
|
+
for (size_t i = 0; i < size - s->shape[0] - 1; ++i) {
|
|
1568
|
+
vals[i] = rubyobj_from_cval_by_itype((char*)(s->ija) + ITYPE_SIZES[s->itype]*(s->shape[0] + 1 + i), s->itype).rval;
|
|
1569
|
+
}
|
|
1570
|
+
|
|
1571
|
+
VALUE ary = rb_ary_new4(size - s->shape[0] - 1, vals);
|
|
1572
|
+
|
|
1573
|
+
for (size_t i = size; i < s->capacity; ++i)
|
|
1574
|
+
rb_ary_push(ary, Qnil);
|
|
1575
|
+
|
|
1576
|
+
return ary;
|
|
1577
|
+
}
|
|
1578
|
+
|
|
1579
|
+
/*
|
|
1580
|
+
* Get the IJA array of a Yale matrix.
|
|
1581
|
+
*/
|
|
1582
|
+
static VALUE nm_ija(VALUE self) {
|
|
1583
|
+
YALE_STORAGE* s = NM_STORAGE_YALE(self);
|
|
1584
|
+
|
|
1585
|
+
size_t size = nm_yale_storage_get_size(s);
|
|
1586
|
+
|
|
1587
|
+
VALUE* vals = ALLOCA_N(VALUE, size);
|
|
1588
|
+
|
|
1589
|
+
for (size_t i = 0; i < size; ++i) {
|
|
1590
|
+
vals[i] = rubyobj_from_cval_by_itype((char*)(s->ija) + ITYPE_SIZES[s->itype]*i, s->itype).rval;
|
|
1591
|
+
}
|
|
1592
|
+
|
|
1593
|
+
VALUE ary = rb_ary_new4(size, vals);
|
|
1594
|
+
|
|
1595
|
+
for (size_t i = size; i < s->capacity; ++i)
|
|
1596
|
+
rb_ary_push(ary, Qnil);
|
|
1597
|
+
|
|
1598
|
+
return ary;
|
|
1599
|
+
}
|
|
1600
|
+
|
|
1601
|
+
} // end of extern "C" block
|