nmatrix 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. data/.autotest +23 -0
  2. data/.gemtest +0 -0
  3. data/Gemfile +7 -0
  4. data/History.txt +6 -0
  5. data/LICENSE.txt +21 -0
  6. data/Manifest.txt +51 -0
  7. data/README.rdoc +63 -0
  8. data/Rakefile +154 -0
  9. data/ext/nmatrix/cblas.c +150 -0
  10. data/ext/nmatrix/dense.c +307 -0
  11. data/ext/nmatrix/dense/blas_header.template.c +52 -0
  12. data/ext/nmatrix/dense/elementwise.template.c +107 -0
  13. data/ext/nmatrix/dense/gemm.template.c +159 -0
  14. data/ext/nmatrix/dense/gemv.template.c +130 -0
  15. data/ext/nmatrix/dense/rationalmath.template.c +68 -0
  16. data/ext/nmatrix/depend +18 -0
  17. data/ext/nmatrix/extconf.rb +143 -0
  18. data/ext/nmatrix/generator.rb +594 -0
  19. data/ext/nmatrix/generator/syntax_tree.rb +481 -0
  20. data/ext/nmatrix/list.c +774 -0
  21. data/ext/nmatrix/nmatrix.c +1977 -0
  22. data/ext/nmatrix/nmatrix.h +912 -0
  23. data/ext/nmatrix/rational.c +98 -0
  24. data/ext/nmatrix/yale.c +726 -0
  25. data/ext/nmatrix/yale/complexmath.template.c +71 -0
  26. data/ext/nmatrix/yale/elementwise.template.c +46 -0
  27. data/ext/nmatrix/yale/elementwise_op.template.c +73 -0
  28. data/ext/nmatrix/yale/numbmm.template.c +94 -0
  29. data/ext/nmatrix/yale/smmp1.template.c +21 -0
  30. data/ext/nmatrix/yale/smmp1_header.template.c +38 -0
  31. data/ext/nmatrix/yale/smmp2.template.c +43 -0
  32. data/ext/nmatrix/yale/smmp2_header.template.c +46 -0
  33. data/ext/nmatrix/yale/sort_columns.template.c +56 -0
  34. data/ext/nmatrix/yale/symbmm.template.c +54 -0
  35. data/ext/nmatrix/yale/transp.template.c +68 -0
  36. data/lib/array.rb +67 -0
  37. data/lib/nmatrix.rb +263 -0
  38. data/lib/string.rb +65 -0
  39. data/spec/nmatrix_spec.rb +395 -0
  40. data/spec/nmatrix_yale_spec.rb +239 -0
  41. data/spec/nvector_spec.rb +43 -0
  42. data/spec/syntax_tree_spec.rb +46 -0
  43. metadata +150 -0
@@ -0,0 +1,98 @@
1
+ /////////////////////////////////////////////////////////////////////
2
+ // = NMatrix
3
+ //
4
+ // A linear algebra library for scientific computation in Ruby.
5
+ // NMatrix is part of SciRuby.
6
+ //
7
+ // NMatrix was originally inspired by and derived from NArray, by
8
+ // Masahiro Tanaka: http://narray.rubyforge.org
9
+ //
10
+ // == Copyright Information
11
+ //
12
+ // SciRuby is Copyright (c) 2010 - 2012, Ruby Science Foundation
13
+ // NMatrix is Copyright (c) 2012, Ruby Science Foundation
14
+ //
15
+ // Please see LICENSE.txt for additional copyright notices.
16
+ //
17
+ // == Contributing
18
+ //
19
+ // By contributing source code to SciRuby, you agree to be bound by
20
+ // our Contributor Agreement:
21
+ //
22
+ // * https://github.com/SciRuby/sciruby/wiki/Contributor-Agreement
23
+ //
24
+ // == rational.c
25
+ //
26
+ // This file is largely based off of Ruby 1.9.3's rational.c. It
27
+ // contains functions for dealing with rational types which are
28
+ // not Ruby VALUE-based.
29
+
30
+ #ifndef RATIONAL_C
31
+ # define RATIONAL_C
32
+
33
+ #include "nmatrix.h"
34
+
35
+
36
+ inline int64_t nmrb_gcd(int64_t x, int64_t y) {
37
+ int64_t t;
38
+
39
+ if (x < 0) x = -x;
40
+ if (y < 0) y = -y;
41
+
42
+ if (x == 0) return y;
43
+ if (y == 0) return x;
44
+
45
+ while (x > 0) {
46
+ t = x;
47
+ x = y % x;
48
+ y = t;
49
+ }
50
+
51
+ return y;
52
+ }
53
+
54
+ /*
55
+ static double f_gcd(double x, double y) {
56
+ double z;
57
+
58
+ if (x < 0.0) x = -x;
59
+ if (y < 0.0) y = -y;
60
+ if (x == 0.0) return y;
61
+ if (y == 0.0) return x;
62
+
63
+ for (;;) {
64
+ z = x;
65
+ x = y % x;
66
+ y = z;
67
+ }
68
+ // NOTREACHED
69
+ }*/
70
+
71
+ /*
72
+ inline VALUE nmrb_rational_new_no_reduce1(VALUE klass, int64_t x) {
73
+ return nurat_s_canonicalize_internal_no_reduce(klass, x, 1);
74
+ }
75
+
76
+ inline static VALUE nurat_s_canonicalize_internal_no_reduce(VALUE klass, int64_t num, int64_t den) {
77
+ if (den < 0) {
78
+ num = -num;
79
+ den = -den;
80
+ } else if (den == 0) {
81
+ rb_raise_zerodiv();
82
+ }
83
+ return nurat_s_new_internal(klass, num, den);
84
+ }*/
85
+ /*
86
+ inline VALUE nmrb_rational_new(VALUE klass, int64_t num, int64_t den) {
87
+ NEWOBJ(obj, struct RRational);
88
+ OBJSETUP(obj, klass, T_RATIONAL);
89
+
90
+ obj->num = INT2FIX(num);
91
+ obj->den = INT2FIX(den);
92
+
93
+ return (VALUE)obj;
94
+ }
95
+ */
96
+
97
+
98
+ #endif
@@ -0,0 +1,726 @@
1
+ /////////////////////////////////////////////////////////////////////
2
+ // = NMatrix
3
+ //
4
+ // A linear algebra library for scientific computation in Ruby.
5
+ // NMatrix is part of SciRuby.
6
+ //
7
+ // NMatrix was originally inspired by and derived from NArray, by
8
+ // Masahiro Tanaka: http://narray.rubyforge.org
9
+ //
10
+ // == Copyright Information
11
+ //
12
+ // SciRuby is Copyright (c) 2010 - 2012, Ruby Science Foundation
13
+ // NMatrix is Copyright (c) 2012, Ruby Science Foundation
14
+ //
15
+ // Please see LICENSE.txt for additional copyright notices.
16
+ //
17
+ // == Contributing
18
+ //
19
+ // By contributing source code to SciRuby, you agree to be bound by
20
+ // our Contributor Agreement:
21
+ //
22
+ // * https://github.com/SciRuby/sciruby/wiki/Contributor-Agreement
23
+ //
24
+ // == yale.c
25
+ //
26
+ // "new yale" storage format for 2D matrices (like yale, but with
27
+ // the diagonal pulled out for O(1) access).
28
+ //
29
+ // Specifications:
30
+ // * dtype and index dtype must necessarily differ
31
+ // * index dtype is defined by whatever unsigned type can store
32
+ // max(rows,cols)
33
+ // * that means vector ija stores only index dtype, but a stores
34
+ // dtype
35
+ // * vectors must be able to grow as necessary
36
+ // * maximum size is rows*cols+1
37
+
38
+ #ifndef YALE_C
39
+ # define YALE_C
40
+
41
+ #include <ruby.h> // mostly for exceptions
42
+
43
+ #include "nmatrix.h"
44
+
45
+ extern VALUE nm_eStorageTypeError;
46
+
47
+
48
+ extern const char *nm_dtypestring[];
49
+
50
+
51
+ void print_vectors(YALE_STORAGE* s) {
52
+ size_t i;
53
+ fprintf(stderr, "------------------------------\n");
54
+ fprintf(stderr, "dtype:%s\tshape:%dx%d\tndnz:%d\tcapacity:%d\tindex_dtype:%s\n", nm_dtypestring[s->dtype], s->shape[0], s->shape[1], s->ndnz, s->capacity, nm_dtypestring[s->index_dtype]);
55
+
56
+
57
+ if (s->capacity > 60) rb_raise(rb_eArgError, "overflow in print_vectors; cannot handle that large of a vector");
58
+ // print indices
59
+
60
+ fprintf(stderr, "i:\t");
61
+ for (i = 0; i < s->capacity; ++i) fprintf(stderr, "%-5lu ", (unsigned long)i);
62
+
63
+ fprintf(stderr, "\nija:\t");
64
+ if (YALE_MAX_SIZE(s) < UINT8_MAX)
65
+ for (i = 0; i < s->capacity; ++i) fprintf(stderr, "%-5u ", *(u_int8_t*)YALE_IJA(s,nm_sizeof[s->index_dtype],i));
66
+ else if (YALE_MAX_SIZE(s) < UINT16_MAX)
67
+ for (i = 0; i < s->capacity; ++i) fprintf(stderr, "%-5u ", *(u_int16_t*)YALE_IJA(s,nm_sizeof[s->index_dtype],i));
68
+ else if (YALE_MAX_SIZE(s) < UINT32_MAX)
69
+ for (i = 0; i < s->capacity; ++i) fprintf(stderr, "%-5u ", *(u_int32_t*)YALE_IJA(s,nm_sizeof[s->index_dtype],i));
70
+ else
71
+ for (i = 0; i < s->capacity; ++i) fprintf(stderr, "%-5llu ", *(u_int64_t*)YALE_IJA(s,nm_sizeof[s->index_dtype],i));
72
+ fprintf(stderr, "\n");
73
+
74
+ // print values
75
+ fprintf(stderr, "a:\t");
76
+ for (i = 0; i < s->capacity; ++i)
77
+ fprintf(stderr, "%-*.3g ", 5, *(double*)((char*)(s->a) + nm_sizeof[NM_FLOAT64]*i));
78
+ fprintf(stderr, "\n");
79
+
80
+ fprintf(stderr, "------------------------------\n");
81
+ }
82
+
83
+
84
+ // Determine the index dtype (which will be used for the ija vector). This is determined by matrix shape, not IJA/A vector capacity.
85
+ // Note that it's MAX-2 because UINTX_MAX and UINTX_MAX-1 are both reserved for sparse matrix multiplication.
86
+ int8_t yale_index_dtype(YALE_STORAGE* s) {
87
+ if (YALE_MAX_SIZE(s) < UINT8_MAX-2) return NM_INT8;
88
+ else if (YALE_MAX_SIZE(s) < UINT16_MAX-2) return NM_INT16;
89
+ else if (YALE_MAX_SIZE(s) < UINT32_MAX-2) return NM_INT32;
90
+ else if (YALE_MAX_SIZE(s) >= UINT64_MAX-2)
91
+ fprintf(stderr, "WARNING: Matrix can contain no more than %llu non-diagonal non-zero entries, or results may be unpredictable\n", UINT64_MAX - SMMP_MIN(s->shape[0],s->shape[1]) - 2);
92
+ // TODO: Turn this into an exception somewhere else. It's pretty unlikely, but who knows.
93
+ return NM_INT64;
94
+ }
95
+
96
+
97
+ // Is the non-diagonal portion of the row empty?
98
+ static bool ndrow_is_empty(const YALE_STORAGE* s, y_size_t ija, const y_size_t ija_next, const void* ZERO) {
99
+ //fprintf(stderr, "ndrow_is_empty: ija=%d, ija_next=%d\n", (size_t)(ija), (size_t)(ija_next));
100
+ if (ija == ija_next) return true;
101
+ while (ija < ija_next) { // do all the entries = zero?
102
+ if (memcmp((char*)s->a + nm_sizeof[s->dtype]*ija, ZERO, nm_sizeof[s->dtype])) return false;
103
+ ++ija;
104
+ }
105
+ return true;
106
+ }
107
+
108
+
109
+ // Are two non-diagonal rows the same? We already know
110
+ static bool ndrow_eqeq_ndrow(const YALE_STORAGE* l, const YALE_STORAGE* r, y_size_t l_ija, const y_size_t l_ija_next, y_size_t r_ija, const y_size_t r_ija_next, const void* ZERO) {
111
+ y_size_t l_ja, r_ja, ja;
112
+ bool l_no_more = false, r_no_more = false;
113
+
114
+ YaleGetIJA(l_ja, l, l_ija);
115
+ YaleGetIJA(r_ja, r, r_ija);
116
+ ja = SMMP_MIN(l_ja, r_ja);
117
+
118
+ //fprintf(stderr, "ndrow_eqeq_ndrow\n");
119
+ while (!(l_no_more && r_no_more)) {
120
+ //fprintf(stderr, "ndrow_eqeq_ndrow(loop): l_ija=%d, l_ija_next=%d, r_ija=%d, r_ija_next=%d\n", (size_t)(l_ija), (size_t)(l_ija_next), (size_t)(r_ija), (size_t)(r_ija_next));
121
+ if (l_ja == r_ja) {
122
+ if (memcmp((char*)r->a + nm_sizeof[r->dtype]*r_ija, (char*)l->a + nm_sizeof[l->dtype]*l_ija, nm_sizeof[l->dtype])) return false;
123
+
124
+ ++l_ija;
125
+ ++r_ija;
126
+
127
+ if (l_ija < l_ija_next) YaleGetIJA(l_ja, l, l_ija);
128
+ else l_no_more = true;
129
+
130
+ if (r_ija < r_ija_next) YaleGetIJA(r_ja, r, r_ija);
131
+ else r_no_more = true;
132
+
133
+ ja = SMMP_MIN(l_ja, r_ja);
134
+ } else if (l_no_more || ja < l_ja) {
135
+ if (memcmp((char*)r->a + nm_sizeof[r->dtype]*r_ija, ZERO, nm_sizeof[r->dtype])) return false;
136
+
137
+ ++r_ija;
138
+ if (r_ija < r_ija_next) {
139
+ YaleGetIJA(r_ja, r, r_ija); // get next column
140
+ ja = SMMP_MIN(l_ja, r_ja);
141
+ } else l_no_more = true;
142
+
143
+ } else if (r_no_more || ja < r_ja) {
144
+ if (memcmp((char*)l->a + nm_sizeof[l->dtype]*l_ija, ZERO, nm_sizeof[l->dtype])) return false;
145
+
146
+ ++l_ija;
147
+ if (l_ija < l_ija_next) {
148
+ YaleGetIJA(l_ja, l, l_ija); // get next column
149
+ ja = SMMP_MIN(l_ja, r_ja);
150
+ } else l_no_more = true;
151
+
152
+ } else {
153
+ fprintf(stderr, "Unhandled in eqeq: l_ja=%d, r_ja=%d\n", l_ja, r_ja);
154
+ }
155
+ }
156
+
157
+ return true; // every item matched
158
+ }
159
+
160
+
161
+ bool yale_storage_eqeq(const YALE_STORAGE* left, const YALE_STORAGE* right) {
162
+ y_size_t l_ija, l_ija_next, r_ija, r_ija_next;
163
+ y_size_t i = 0;
164
+
165
+ // Need to know zero.
166
+ void* ZERO = alloca(nm_sizeof[left->dtype]);
167
+ if (left->dtype == NM_ROBJ) *(VALUE*)ZERO = INT2FIX(0);
168
+ else memset(ZERO, 0, nm_sizeof[left->dtype]);
169
+
170
+ // Easy comparison: Do the IJA and A vectors match exactly?
171
+ //if (!memcmp(left->ija, right->ija, nm_sizeof[left->index_dtype]) && !memcmp(left->a, right->a, nm_sizeof[left->dtype])) return true;
172
+ //fprintf(stderr, "yale_storage_eqeq\n");
173
+
174
+ // Compare the diagonals first.
175
+ if (memcmp(left->a, right->a, nm_sizeof[left->dtype] * left->shape[0])) return false;
176
+
177
+ while (i < left->shape[0]) {
178
+ // Get start and end positions of row
179
+ YaleGetIJA(l_ija, left, i);
180
+ YaleGetIJA(l_ija_next, left, i+1);
181
+ YaleGetIJA(r_ija, right, i);
182
+ YaleGetIJA(r_ija_next, right, i+1);
183
+
184
+ //fprintf(stderr, "yale_storage_eqeq: l_ija=%d, l_ija_next=%d, r_ija=%d, r_ija_next=%d, i=%d\n", (size_t)(l_ija), (size_t)(l_ija_next), (size_t)(r_ija), (size_t)(r_ija_next), (size_t)(i));
185
+
186
+ // Check to see if one row is empty and the other isn't.
187
+ if (ndrow_is_empty(left, l_ija, l_ija_next, ZERO)) {
188
+ if (!ndrow_is_empty(right, r_ija, r_ija_next, ZERO)) return false;
189
+
190
+ } else if (ndrow_is_empty(right, r_ija, r_ija_next, ZERO)) { // one is empty but the other isn't
191
+ return false;
192
+ } else if (!ndrow_eqeq_ndrow(left, right, l_ija, l_ija_next, r_ija, r_ija_next, ZERO)) { // Neither row is empty. Must compare the rows directly.
193
+ return false;
194
+ }
195
+
196
+ ++i;
197
+ }
198
+
199
+ return true;
200
+ }
201
+
202
+
203
+ // Just like yale_vector_replace, but doesn't replace the contents of the cell, only the column index.
204
+ static char yale_vector_replace_j(YALE_STORAGE* s, y_size_t pos, y_size_t* j) {
205
+ SetFuncs[s->index_dtype][Y_SIZE_T](1,
206
+ pos*nm_sizeof[s->index_dtype] + (char*)(s->ija),
207
+ nm_sizeof[s->index_dtype],
208
+ j,
209
+ sizeof(y_size_t));
210
+
211
+ return 'r';
212
+ }
213
+
214
+
215
+ char yale_vector_replace(YALE_STORAGE* s, y_size_t pos, y_size_t* j, void* val, y_size_t n) {
216
+
217
+ // Now insert the new values
218
+ SetFuncs[s->index_dtype][Y_SIZE_T](n,
219
+ pos*nm_sizeof[s->index_dtype] + (char*)(s->ija),
220
+ nm_sizeof[s->index_dtype],
221
+ j,
222
+ sizeof(y_size_t));
223
+ SetFuncs[s->dtype][s->dtype](n,
224
+ pos*nm_sizeof[s->dtype] + (char*)(s->a),
225
+ nm_sizeof[s->dtype],
226
+ val,
227
+ nm_sizeof[s->dtype]);
228
+
229
+ return 'r';
230
+ }
231
+
232
+
233
+ char yale_vector_insert_resize(YALE_STORAGE* s, y_size_t current_size, y_size_t pos, y_size_t* j, void* val, y_size_t n, bool struct_only) {
234
+ void *new_ija, *new_a;
235
+ // Determine the new capacity for the IJA and A vectors.
236
+ size_t new_capacity = s->capacity * YALE_GROWTH_CONSTANT;
237
+ if (new_capacity > YALE_MAX_SIZE(s)) {
238
+ new_capacity = YALE_MAX_SIZE(s);
239
+ if (current_size + n > YALE_MAX_SIZE(s)) rb_raise(rb_eNoMemError, "insertion size exceeded maximum yale matrix size");
240
+ }
241
+ if (new_capacity < current_size + n) new_capacity = current_size + n;
242
+
243
+ // Allocate the new vectors.
244
+ new_ija = ALLOC_N( char, nm_sizeof[s->index_dtype] * new_capacity );
245
+ new_a = ALLOC_N( char, nm_sizeof[s->dtype] * new_capacity );
246
+
247
+ // Check that allocation succeeded.
248
+ if (!new_ija || !new_a) {
249
+ free(new_a); free(new_ija);
250
+ rb_raise(rb_eNoMemError, "yale sparse vectors are full and there is insufficient memory for growing them");
251
+ return (char)false;
252
+ }
253
+
254
+ // Copy all values prior to the insertion site to the new IJA and new A
255
+ SetFuncs[s->index_dtype][s->index_dtype](pos, new_ija, nm_sizeof[s->index_dtype], s->ija, nm_sizeof[s->index_dtype]);
256
+ if (!struct_only)
257
+ SetFuncs[s->dtype ][s->dtype ](pos, new_a, nm_sizeof[s->dtype], s->a, nm_sizeof[s->dtype] );
258
+
259
+ // Copy all values subsequent to the insertion site to the new IJA and new A, leaving room (size n) for insertion.
260
+ SetFuncs[s->index_dtype][s->index_dtype](current_size-pos+n-1, (char*)new_ija + nm_sizeof[s->index_dtype]*(pos+n), nm_sizeof[s->index_dtype], (char*)(s->ija) + nm_sizeof[s->index_dtype]*pos, nm_sizeof[s->index_dtype]);
261
+ if (!struct_only)
262
+ SetFuncs[s->dtype ][s->dtype ](current_size-pos+n-1, (char*)new_a + nm_sizeof[s->dtype]*(pos+n), nm_sizeof[s->dtype], (char*)(s->a) + nm_sizeof[s->dtype ]*pos, nm_sizeof[s->dtype ]);
263
+
264
+ s->capacity = new_capacity;
265
+
266
+ free(s->ija);
267
+ free(s->a);
268
+
269
+ s->ija = new_ija;
270
+ s->a = new_a;
271
+
272
+ return 'i';
273
+ }
274
+
275
+
276
+ // Insert a value or contiguous values in the ija and a vectors (after ja and diag). Does not free anything; you are responsible!
277
+ //
278
+ // TODO: Improve this so it can handle non-contiguous element insertions efficiently.
279
+ // (For now, we can just sort the elements in the row in question.)
280
+ char yale_vector_insert(YALE_STORAGE* s, y_size_t pos, y_size_t* j, void* val, y_size_t n, bool struct_only) {
281
+ y_size_t sz, i;
282
+
283
+ if (pos < s->shape[0])
284
+ rb_raise(rb_eArgError, "vector insert pos is before beginning of ja; this should not happen");
285
+
286
+ YaleGetSize(sz, s);
287
+
288
+ if (sz + n > s->capacity) yale_vector_insert_resize(s, sz, pos, j, val, n, struct_only);
289
+ else {
290
+
291
+ // No resize required:
292
+ // easy (but somewhat slow), just copy elements to the tail, starting at the end, one element at a time.
293
+ // TODO: This can be made slightly more efficient, but only after the tests are written.
294
+ for (i = 0; i < sz - pos; ++i) {
295
+ SetFuncs[s->index_dtype][s->index_dtype](1, (char*)(s->ija) + (sz+n-1-i)*nm_sizeof[s->index_dtype], 0, (char*)(s->ija) + (sz-1-i)*nm_sizeof[s->index_dtype], 0);
296
+
297
+ if (!struct_only)
298
+ SetFuncs[s->dtype ][s->dtype ](1, (char*)(s->a) + (sz+n-1-i)*nm_sizeof[s->dtype ], 0, (char*)(s->a) + (sz-1-i)*nm_sizeof[s->dtype ], 0);
299
+ }
300
+ }
301
+
302
+ // Now insert the new values.
303
+ if (struct_only) yale_vector_replace_j(s, pos, j);
304
+ else yale_vector_replace(s, pos, j, val, n);
305
+
306
+ return 'i';
307
+ }
308
+
309
+
310
+ void delete_yale_storage(YALE_STORAGE* s) {
311
+ if (s) {
312
+ free(s->shape);
313
+ free(s->ija);
314
+ free(s->a);
315
+ free(s);
316
+ }
317
+ }
318
+
319
+
320
+ void mark_yale_storage(void* m) {
321
+ size_t i;
322
+ YALE_STORAGE* storage;
323
+
324
+ if (m) {
325
+ storage = (YALE_STORAGE*)(((NMATRIX*)m)->storage);
326
+ fprintf(stderr, "mark_yale_storage\n");
327
+ if (storage && storage->dtype == NM_ROBJ)
328
+ for (i = 0; i < storage->capacity; ++i)
329
+ rb_gc_mark(*((VALUE*)((char*)(storage->a) + i*nm_sizeof[NM_ROBJ])));
330
+ }
331
+ }
332
+
333
+
334
+ // Allocate for a copy or copy-cast operation, and copy the IJA portion of the matrix (the structure).
335
+ static YALE_STORAGE* copy_alloc_yale_storage_struct(const YALE_STORAGE* rhs, const int8_t new_dtype, const y_size_t new_capacity, const y_size_t new_size) {
336
+ YALE_STORAGE* lhs = ALLOC( YALE_STORAGE );
337
+ lhs->rank = rhs->rank;
338
+ lhs->shape = ALLOC_N( size_t, lhs->rank );
339
+ memcpy(lhs->shape, rhs->shape, lhs->rank * sizeof(size_t));
340
+ lhs->index_dtype = rhs->index_dtype;
341
+ lhs->capacity = new_capacity;
342
+ lhs->dtype = new_dtype;
343
+ lhs->ndnz = rhs->ndnz;
344
+
345
+ lhs->ija = ALLOC_N( char, nm_sizeof[lhs->index_dtype] * lhs->capacity );
346
+ lhs->a = ALLOC_N( char, nm_sizeof[lhs->dtype] * lhs->capacity );
347
+
348
+ // Now copy the contents -- but only within the boundaries set by the size. Leave
349
+ // the rest uninitialized.
350
+ YaleGetSize(new_size, rhs);
351
+ memcpy(lhs->ija, rhs->ija, new_size * nm_sizeof[lhs->index_dtype]); // indices
352
+
353
+ return lhs;
354
+ }
355
+
356
+
357
+ // copy constructor
358
+ YALE_STORAGE* copy_yale_storage(YALE_STORAGE* rhs) {
359
+ y_size_t size;
360
+ YALE_STORAGE* lhs;
361
+
362
+ YaleGetSize(size, rhs);
363
+ lhs = copy_alloc_yale_storage_struct(rhs, rhs->dtype, rhs->capacity, size);
364
+
365
+ // Now copy the contents -- but only within the boundaries set by the size. Leave
366
+ // the rest uninitialized.
367
+ memcpy(lhs->a, rhs->a, size * nm_sizeof[lhs->dtype]);
368
+
369
+ return lhs;
370
+ }
371
+
372
+
373
+
374
+ // copy constructor
375
+ YALE_STORAGE* cast_copy_yale_storage(YALE_STORAGE* rhs, int8_t new_dtype) {
376
+ y_size_t size;
377
+ YALE_STORAGE* lhs;
378
+
379
+ YaleGetSize(size, rhs);
380
+ lhs = copy_alloc_yale_storage_struct(rhs, new_dtype, rhs->capacity, size);
381
+
382
+ // Copy contents (not structure)
383
+ if (lhs->dtype == rhs->dtype) memcpy(lhs->a, rhs->a, size * nm_sizeof[lhs->dtype]);
384
+ else SetFuncs[new_dtype][rhs->dtype](size, lhs->a, nm_sizeof[lhs->dtype], rhs->a, nm_sizeof[rhs->dtype]);
385
+
386
+ return lhs;
387
+ }
388
+
389
+
390
+ // Clear out the D portion of the A vector (clearing the diagonal and setting the zero value).
391
+ //
392
+ // Note: This sets a literal 0 value. If your dtype is NM_ROBJ (a Ruby object), it'll actually
393
+ // be INT2FIX(0) instead of a string of NULLs.
394
+ static void clear_diagonal_and_zero(YALE_STORAGE* s) {
395
+ y_size_t i;
396
+ // Clear out the diagonal + one extra entry
397
+ if (s->dtype == NM_ROBJ) {
398
+ for (i = 0; i < YALE_IA_SIZE(s)+1; ++i) // insert Ruby zeros
399
+ *(VALUE*)( (char*)(s->a) + i*nm_sizeof[s->dtype] ) = INT2FIX(0);
400
+ } else { // just insert regular zeros
401
+ memset(s->a, 0, nm_sizeof[s->dtype] * (YALE_IA_SIZE(s)+1));
402
+ }
403
+ }
404
+
405
+
406
+
407
+ YALE_STORAGE* scast_copy_yale_list(const LIST_STORAGE* rhs, int8_t l_dtype) {
408
+ YALE_STORAGE* lhs;
409
+ size_t* shape;
410
+ NODE *i_curr, *j_curr;
411
+ y_size_t ija;
412
+ size_t ndnz = count_list_storage_nd_elements(rhs);
413
+
414
+ if (rhs->rank != 2)
415
+ rb_raise(nm_eStorageTypeError, "can only convert matrices of rank 2 to yale");
416
+
417
+ if ((rhs->dtype == NM_ROBJ && *(VALUE*)(rhs->default_val) == INT2FIX(0)) || strncmp(rhs->default_val, "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0", nm_sizeof[rhs->dtype]))
418
+ rb_raise(nm_eStorageTypeError, "list matrix must have default value of 0 to convert to yale");
419
+
420
+ // Copy shape for yale construction
421
+ shape = ALLOC_N(size_t, 2);
422
+ shape[0] = rhs->shape[0];
423
+ shape[1] = rhs->shape[1];
424
+
425
+ lhs = create_yale_storage(l_dtype, shape, 2, shape[0] + ndnz + 1);
426
+ clear_diagonal_and_zero(lhs); // clear the diagonal and the zero location.
427
+
428
+ ija = lhs->shape[0]+1;
429
+
430
+ for (i_curr = rhs->rows->first; i_curr; i_curr = i_curr->next) {
431
+
432
+ // indicate the beginning of a row in the IJA array
433
+ YaleSetIJA(i_curr->key, lhs, ija);
434
+
435
+ for (j_curr = ((LIST*)(i_curr->val))->first; j_curr; j_curr = j_curr->next) {
436
+ if (i_curr->key == j_curr->key) {
437
+ // set diagonal
438
+ SetFuncs[l_dtype][rhs->dtype](1, (char*)(lhs->a) + (i_curr->key)*nm_sizeof[l_dtype], 0, j_curr->val, 0);
439
+ } else {
440
+ // set column value
441
+ YaleSetIJA(ija, lhs, j_curr->key); // write column index
442
+
443
+ // set cell value
444
+ SetFuncs[l_dtype][rhs->dtype](1, (char*)(lhs->a) + ija*nm_sizeof[l_dtype], 0, j_curr->val, 0);
445
+
446
+ ++ija;
447
+ }
448
+ }
449
+
450
+ if (!i_curr->next) YaleSetIJA(i_curr->key, lhs, ija); // indicate the end of the last row
451
+ }
452
+
453
+ lhs->ndnz = ndnz;
454
+ return lhs;
455
+ }
456
+
457
+
458
+ YALE_STORAGE* scast_copy_yale_dense(const DENSE_STORAGE* rhs, int8_t l_dtype) {
459
+ YALE_STORAGE* lhs;
460
+ size_t i, j;
461
+ y_size_t pos = 0, ndnz = 0, ija;
462
+ size_t* shape;
463
+
464
+ // Figure out values to write for zero in yale and compare to zero in dense
465
+ void *R_ZERO = ALLOCA_N(char, nm_sizeof[rhs->dtype]);
466
+ if (rhs->dtype == NM_ROBJ) *(VALUE*)R_ZERO = INT2FIX(0);
467
+ else memset(R_ZERO, 0, nm_sizeof[rhs->dtype]);
468
+
469
+ if (rhs->rank != 2)
470
+ rb_raise(nm_eStorageTypeError, "can only convert matrices of rank 2 to yale");
471
+
472
+ // First, count the non-diagonal nonzeros
473
+ for (i = 0; i < rhs->shape[0]; ++i) {
474
+ for (j = 0; j < rhs->shape[1]; ++j) {
475
+ if (i != j && memcmp((char*)(rhs->elements) + pos*nm_sizeof[rhs->dtype], R_ZERO, nm_sizeof[rhs->dtype])) ++ndnz;
476
+ ++pos; // move forward 1 position in dense matrix elements array
477
+ }
478
+ }
479
+
480
+ // Copy shape for yale construction
481
+ shape = ALLOC_N(size_t, 2);
482
+ shape[0] = rhs->shape[0];
483
+ shape[1] = rhs->shape[1];
484
+
485
+ // Create with minimum possible capacity -- just enough to hold all of the entries
486
+ lhs = create_yale_storage(l_dtype, shape, 2, shape[0] + ndnz + 1);
487
+
488
+ // Set the zero position in the yale matrix
489
+ cast_copy_value_single((char*)(lhs->a) + shape[0]*nm_sizeof[l_dtype], R_ZERO, l_dtype, rhs->dtype);
490
+
491
+ // Start just after the zero position.
492
+ ija = lhs->shape[0]+1;
493
+ pos = 0;
494
+
495
+ // Copy contents
496
+ for (i = 0; i < rhs->shape[0]; ++i) {
497
+ // indicate the beginning of a row in the IJA array
498
+ YaleSetIJA(i, lhs, ija);
499
+
500
+ for (j = 0; j < rhs->shape[1]; ++j) {
501
+
502
+ if (i == j) { // copy to diagonal
503
+ cast_copy_value_single((char*)(lhs->a) + i*nm_sizeof[l_dtype], (char*)(rhs->elements) + pos*nm_sizeof[rhs->dtype], l_dtype, rhs->dtype);
504
+
505
+ } else if (memcmp((char*)(rhs->elements) + pos*nm_sizeof[rhs->dtype], R_ZERO, nm_sizeof[rhs->dtype])) { // copy nonzero to LU
506
+ YaleSetIJA(ija, lhs, j); // write column index
507
+
508
+ cast_copy_value_single((char*)(lhs->a) + ija*nm_sizeof[l_dtype], (char*)(rhs->elements) + pos*nm_sizeof[rhs->dtype], l_dtype, rhs->dtype);
509
+
510
+ ++ija;
511
+ }
512
+ ++pos;
513
+ }
514
+ }
515
+ YaleSetIJA(i, lhs, ija); // indicate the end of the last row
516
+
517
+ lhs->ndnz = ndnz;
518
+
519
+ return lhs;
520
+ }
521
+
522
+
523
+ // If we add n items to row i, we need to increment ija[i+1] and onward
524
+ // TODO: Add function pointer array for AddFuncs, the same way we have SetFuncs
525
+ static void yale_storage_increment_ia_after(YALE_STORAGE* s, y_size_t ija_size, y_size_t i, y_size_t n) {
526
+ y_size_t val;
527
+
528
+ ++i;
529
+ for (; i <= ija_size; ++i) {
530
+ YaleGetIJA(val, s, i);
531
+ val += n;
532
+ YaleSetIJA(i, s, val);
533
+ }
534
+ }
535
+
536
+
537
+ // Binary search for returning insertion points
538
+ static y_size_t yale_storage_insert_search(YALE_STORAGE* s, y_size_t left, y_size_t right, y_size_t key, bool* found) {
539
+ y_size_t mid = (left + right)/2, mid_j;
540
+
541
+ if (left > right) {
542
+ *found = false;
543
+ return left;
544
+ }
545
+
546
+ YaleGetIJA(mid_j, s, mid);
547
+ if (mid_j == key) {
548
+ *found = true;
549
+ return mid;
550
+ }
551
+ else if (mid_j > key) return yale_storage_insert_search(s, left, mid-1, key, found);
552
+ else return yale_storage_insert_search(s, mid+1, right, key, found);
553
+ }
554
+
555
+
556
+
557
+ YALE_STORAGE* create_merged_yale_storage(const YALE_STORAGE* template, const YALE_STORAGE* other) {
558
+ y_size_t ija, ija_next, o_ija, o_ija_next;
559
+ y_size_t ja, o_ja, size, pos;
560
+ YALE_STORAGE* s;
561
+ bool found;
562
+ char ins_type;
563
+ size_t i;
564
+
565
+ YaleGetSize(size, template);
566
+ s = copy_alloc_yale_storage_struct(template, template->dtype, SMMP_MAX(template->capacity, other->capacity), size);
567
+
568
+ // set the element between D and LU (the boundary in A), which should be 0.
569
+ memcpy((char*)(s->a) + nm_sizeof[s->dtype] * s->shape[0], (char*)(template->a) + nm_sizeof[template->dtype] * template->shape[0], nm_sizeof[s->dtype]);
570
+
571
+ if (other && other != template) { // some operations are unary and don't need this; others are x+x and don't need this
572
+ for (i = 0; i < s->shape[0]; ++i) {
573
+ YaleGetIJA(ija, s, i);
574
+ YaleGetIJA(ija_next, s, i+1);
575
+
576
+ YaleGetIJA(o_ija, other, i);
577
+ YaleGetIJA(o_ija_next, other, i+1);
578
+
579
+ while (o_ija < o_ija_next) {
580
+ YaleGetIJA(o_ja, other, o_ija);
581
+ YaleGetIJA(ja, s, ija);
582
+
583
+ if (ija == ija_next) { // destination row is empty
584
+ ins_type = yale_vector_insert(s, ija, &ja, NULL, 1, true);
585
+ yale_storage_increment_ia_after(s, YALE_IA_SIZE(s), i, 1);
586
+ ++(s->ndnz);
587
+ ++ija;
588
+ if (ins_type == 'i') ++ija_next;
589
+ } else { // merge positions into destination row
590
+ pos = yale_storage_insert_search(s, ija, ija_next-1, ja, &found);
591
+ if (!found) {
592
+ yale_vector_insert(s, pos, &ja, NULL, 1, true);
593
+ yale_storage_increment_ia_after(s, YALE_IA_SIZE(s), i, 1);
594
+ ++(s->ndnz);
595
+ if (ins_type == 'i') ++ija_next;
596
+ }
597
+ ija = pos+1; // can now set a left boundary for the next search
598
+ }
599
+
600
+ ++o_ija;
601
+ }
602
+ }
603
+ }
604
+
605
+ return s;
606
+ }
607
+
608
+
609
+
610
+ YALE_STORAGE* create_yale_storage(int8_t dtype, size_t* shape, size_t rank, size_t init_capacity) {
611
+ YALE_STORAGE* s;
612
+
613
+ if (rank != 2) rb_raise(rb_eNotImpError, "Can only support 2D matrices");
614
+
615
+ s = ALLOC( YALE_STORAGE );
616
+
617
+ s->ndnz = 0;
618
+ s->dtype = dtype;
619
+ s->shape = shape;
620
+ s->rank = rank;
621
+ s->index_dtype = yale_index_dtype(s);
622
+
623
+ if (init_capacity < YALE_MINIMUM(s)) init_capacity = YALE_MINIMUM(s);
624
+ s->capacity = init_capacity;
625
+
626
+
627
+ s->ija = ALLOC_N( char, nm_sizeof[s->index_dtype] * init_capacity );
628
+ s->a = ALLOC_N( char, nm_sizeof[s->dtype] * init_capacity );
629
+
630
+ return s;
631
+ }
632
+
633
+
634
+ // Empty the matrix
635
+ void init_yale_storage(YALE_STORAGE* s) {
636
+ y_size_t IA_INIT = YALE_IA_SIZE(s)+1, i;
637
+
638
+ // clear out IJA vector
639
+ for (i = 0; i < YALE_IA_SIZE(s)+1; ++i)
640
+ SetFuncs[s->index_dtype][Y_SIZE_T](1, (char*)(s->ija) + i*nm_sizeof[s->index_dtype], 0, &IA_INIT, 0); // set initial values for IJA
641
+
642
+ clear_diagonal_and_zero(s);
643
+ }
644
+
645
+
646
+ char yale_storage_set_diagonal(YALE_STORAGE* s, y_size_t i, void* v) {
647
+ memcpy(YALE_DIAG(s, nm_sizeof[s->dtype], i), v, nm_sizeof[s->dtype]); return 'r';
648
+ }
649
+
650
+
651
+ // Binary search for returning stored values
652
+ int yale_storage_binary_search(YALE_STORAGE* s, y_size_t left, y_size_t right, y_size_t key) {
653
+ y_size_t mid = (left + right)/2, mid_j;
654
+
655
+ if (left > right) return -1;
656
+
657
+ YaleGetIJA(mid_j, s, mid);
658
+ if (mid_j == key) return mid;
659
+ else if (mid_j > key) return yale_storage_binary_search(s, left, mid-1, key);
660
+ else return yale_storage_binary_search(s, mid+1, right, key);
661
+ }
662
+
663
+
664
+
665
+
666
+ char yale_storage_set(YALE_STORAGE* s, size_t* coords, void* v) {
667
+ y_size_t i_next = coords[0] + 1;
668
+ y_size_t ija, ija_next, ija_size;
669
+ y_size_t pos;
670
+ bool found = false;
671
+ char ins_type;
672
+
673
+ if (coords[0] == coords[1]) return yale_storage_set_diagonal(s, coords[0], v);
674
+
675
+ // Get IJA positions of the beginning and end of the row
676
+ YaleGetIJA(ija, s, coords[0]);
677
+ YaleGetIJA(ija_next, s, i_next);
678
+
679
+ if (ija == ija_next) { // empty row
680
+ ins_type = yale_vector_insert(s, ija, &(coords[1]), v, 1, false);
681
+ yale_storage_increment_ia_after(s, YALE_IA_SIZE(s), coords[0], 1);
682
+ s->ndnz++;
683
+ return ins_type;
684
+ }
685
+
686
+ // non-empty row. search for coords[1] in the IJA array, between ija and ija_next
687
+ // (including ija, not including ija_next)
688
+ YaleGetSize(ija_size, s);
689
+ //--ija_next;
690
+
691
+ // Do a binary search for the column
692
+ pos = yale_storage_insert_search(s, ija, ija_next-1, coords[1], &found);
693
+
694
+ if (found) return yale_vector_replace(s, pos, &(coords[1]), v, 1);
695
+
696
+ ins_type = yale_vector_insert(s, pos, &(coords[1]), v, 1, false);
697
+ yale_storage_increment_ia_after(s, YALE_IA_SIZE(s), coords[0], 1);
698
+ s->ndnz++;
699
+ return ins_type;
700
+
701
+ }
702
+
703
+
704
+ void* yale_storage_ref(YALE_STORAGE* s, size_t* coords) {
705
+ y_size_t l, r, i_plus_one = coords[0] + 1, test_j;
706
+ int pos;
707
+
708
+ if (coords[0] == coords[1]) return YALE_DIAG(s,nm_sizeof[s->dtype],coords[0]);
709
+
710
+ YaleGetIJA(l, s, coords[0]);
711
+ YaleGetIJA(r, s, i_plus_one);
712
+
713
+ if (l == r) return YALE_A(s, nm_sizeof[s->dtype], s->shape[0]); // return zero pointer
714
+
715
+ pos = yale_storage_binary_search(s, l, r-1, coords[1]); // binary search for the column's location
716
+ if (pos != -1) {
717
+ YaleGetIJA(test_j, s, pos);
718
+ if (test_j == coords[1])
719
+ return YALE_A(s, nm_sizeof[s->dtype], pos); // Found exact value.
720
+ }
721
+
722
+ // return a pointer that happens to be zero
723
+ return YALE_A(s, nm_sizeof[s->dtype], s->shape[0]);
724
+ }
725
+
726
+ #endif