nmatrix 0.0.6 → 0.0.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (67) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +2 -0
  3. data/Gemfile +5 -0
  4. data/History.txt +97 -0
  5. data/Manifest.txt +34 -7
  6. data/README.rdoc +13 -13
  7. data/Rakefile +36 -26
  8. data/ext/nmatrix/data/data.cpp +15 -2
  9. data/ext/nmatrix/data/data.h +4 -0
  10. data/ext/nmatrix/data/ruby_object.h +5 -14
  11. data/ext/nmatrix/extconf.rb +3 -2
  12. data/ext/nmatrix/{util/math.cpp → math.cpp} +296 -6
  13. data/ext/nmatrix/math/asum.h +143 -0
  14. data/ext/nmatrix/math/geev.h +82 -0
  15. data/ext/nmatrix/math/gemm.h +267 -0
  16. data/ext/nmatrix/math/gemv.h +208 -0
  17. data/ext/nmatrix/math/ger.h +96 -0
  18. data/ext/nmatrix/math/gesdd.h +80 -0
  19. data/ext/nmatrix/math/gesvd.h +78 -0
  20. data/ext/nmatrix/math/getf2.h +86 -0
  21. data/ext/nmatrix/math/getrf.h +240 -0
  22. data/ext/nmatrix/math/getri.h +107 -0
  23. data/ext/nmatrix/math/getrs.h +125 -0
  24. data/ext/nmatrix/math/idamax.h +86 -0
  25. data/ext/nmatrix/{util → math}/lapack.h +60 -356
  26. data/ext/nmatrix/math/laswp.h +165 -0
  27. data/ext/nmatrix/math/long_dtype.h +52 -0
  28. data/ext/nmatrix/math/math.h +1154 -0
  29. data/ext/nmatrix/math/nrm2.h +181 -0
  30. data/ext/nmatrix/math/potrs.h +125 -0
  31. data/ext/nmatrix/math/rot.h +141 -0
  32. data/ext/nmatrix/math/rotg.h +115 -0
  33. data/ext/nmatrix/math/scal.h +73 -0
  34. data/ext/nmatrix/math/swap.h +73 -0
  35. data/ext/nmatrix/math/trsm.h +383 -0
  36. data/ext/nmatrix/nmatrix.cpp +176 -152
  37. data/ext/nmatrix/nmatrix.h +1 -2
  38. data/ext/nmatrix/ruby_constants.cpp +9 -4
  39. data/ext/nmatrix/ruby_constants.h +1 -0
  40. data/ext/nmatrix/storage/dense.cpp +57 -41
  41. data/ext/nmatrix/storage/list.cpp +52 -50
  42. data/ext/nmatrix/storage/storage.cpp +59 -43
  43. data/ext/nmatrix/storage/yale.cpp +352 -333
  44. data/ext/nmatrix/storage/yale.h +4 -0
  45. data/lib/nmatrix.rb +2 -2
  46. data/lib/nmatrix/blas.rb +4 -4
  47. data/lib/nmatrix/enumerate.rb +241 -0
  48. data/lib/nmatrix/lapack.rb +54 -1
  49. data/lib/nmatrix/math.rb +462 -0
  50. data/lib/nmatrix/nmatrix.rb +210 -486
  51. data/lib/nmatrix/nvector.rb +0 -62
  52. data/lib/nmatrix/rspec.rb +75 -0
  53. data/lib/nmatrix/shortcuts.rb +136 -108
  54. data/lib/nmatrix/version.rb +1 -1
  55. data/spec/blas_spec.rb +20 -12
  56. data/spec/elementwise_spec.rb +22 -13
  57. data/spec/io_spec.rb +1 -0
  58. data/spec/lapack_spec.rb +197 -0
  59. data/spec/nmatrix_spec.rb +39 -38
  60. data/spec/nvector_spec.rb +3 -9
  61. data/spec/rspec_monkeys.rb +29 -0
  62. data/spec/rspec_spec.rb +34 -0
  63. data/spec/shortcuts_spec.rb +14 -16
  64. data/spec/slice_spec.rb +242 -186
  65. data/spec/spec_helper.rb +19 -0
  66. metadata +33 -5
  67. data/ext/nmatrix/util/math.h +0 -2612
@@ -139,7 +139,8 @@ DENSE_STORAGE* create_from_yale_storage(const YALE_STORAGE* rhs, dtype_t l_dtype
139
139
 
140
140
  // Allocate and set shape.
141
141
  size_t* shape = ALLOC_N(size_t, rhs->dim);
142
- memcpy(shape, rhs->shape, rhs->dim * sizeof(size_t));
142
+ shape[0] = rhs->shape[0];
143
+ shape[1] = rhs->shape[1];
143
144
 
144
145
  DENSE_STORAGE* lhs = nm_dense_storage_create(l_dtype, shape, rhs->dim, NULL, 0);
145
146
  LDType* lhs_elements = reinterpret_cast<LDType*>(lhs->elements);
@@ -147,45 +148,49 @@ DENSE_STORAGE* create_from_yale_storage(const YALE_STORAGE* rhs, dtype_t l_dtype
147
148
  // Position in dense to write to.
148
149
  size_t pos = 0;
149
150
 
150
- LDType LCAST_ZERO = rhs_a[rhs->shape[0]];
151
+ LDType LCAST_ZERO = rhs_a[rhs->src->shape[0]];
151
152
 
152
153
  // Walk through rows. For each entry we set in dense, increment pos.
153
- for (RIType i = 0; i < rhs->shape[0]; ++i) {
154
-
155
- // Position in yale array
156
- RIType ija = rhs_ija[i];
154
+ for (size_t i = 0; i < shape[0]; ++i) {
155
+ RIType ri = i + rhs->offset[0];
157
156
 
158
- if (ija == rhs_ija[i+1]) { // Check boundaries of row: is row empty?
157
+ if (rhs_ija[ri] == rhs_ija[ri+1]) { // Check boundaries of row: is row empty? (Yes.)
159
158
 
160
159
  // Write zeros in each column.
161
- for (RIType j = 0; j < rhs->shape[1]; ++j) { // Move to next dense position.
160
+ for (size_t j = 0; j < shape[1]; ++j) { // Move to next dense position.
162
161
 
163
- // Fill in zeros (except for diagonal)
164
- if (i == j) lhs_elements[pos] = static_cast<LDType>(rhs_a[i]);
165
- else lhs_elements[pos] = LCAST_ZERO;
162
+ // Fill in zeros and copy the diagonal entry for this empty row.
163
+ if (ri == j + rhs->offset[1]) lhs_elements[pos] = static_cast<LDType>(rhs_a[ri]);
164
+ else lhs_elements[pos] = LCAST_ZERO;
166
165
 
167
166
  ++pos;
168
167
  }
169
168
 
170
- } else {
171
- // Row contains entries: write those in each column, interspersed with zeros.
172
- RIType jj = rhs_ija[ija];
169
+ } else { // Row contains entries: write those in each column, interspersed with zeros.
170
+
171
+ // Get the first ija position of the row (as sliced)
172
+ RIType ija = nm::yale_storage::binary_search_left_boundary<RIType>(rhs, rhs_ija[ri], rhs_ija[ri+1]-1, rhs->offset[1]);
173
+
174
+ // What column is it?
175
+ RIType next_stored_rj = rhs_ija[ija];
173
176
 
174
- for (size_t j = 0; j < rhs->shape[1]; ++j) {
175
- if (i == j) {
176
- lhs_elements[pos] = static_cast<LDType>(rhs_a[i]);
177
+ for (size_t j = 0; j < shape[1]; ++j) {
178
+ RIType rj = j + rhs->offset[1];
177
179
 
178
- } else if (j == jj) {
180
+ if (rj == ri) { // at a diagonal in RHS
181
+ lhs_elements[pos] = static_cast<LDType>(rhs_a[ri]);
182
+
183
+ } else if (rj == next_stored_rj) { // column ID was found in RHS
179
184
  lhs_elements[pos] = static_cast<LDType>(rhs_a[ija]); // Copy from rhs.
180
185
 
181
186
  // Get next.
182
187
  ++ija;
183
188
 
184
189
  // Increment to next column ID (or go off the end).
185
- if (ija < rhs_ija[i+1]) jj = rhs_ija[ija];
186
- else jj = rhs->shape[1];
190
+ if (ija < rhs_ija[ri+1]) next_stored_rj = rhs_ija[ija];
191
+ else next_stored_rj = rhs->src->shape[1];
187
192
 
188
- } else { // j < jj
193
+ } else { // rj < next_stored_rj
189
194
 
190
195
  // Insert zero.
191
196
  lhs_elements[pos] = LCAST_ZERO;
@@ -319,7 +324,7 @@ LIST_STORAGE* create_from_yale_storage(const YALE_STORAGE* rhs, dtype_t l_dtype)
319
324
  shape[0] = rhs->shape[0]; shape[1] = rhs->shape[1];
320
325
 
321
326
  RDType* rhs_a = reinterpret_cast<RDType*>(rhs->a);
322
- RDType R_ZERO = rhs_a[ rhs->shape[0] ];
327
+ RDType R_ZERO = rhs_a[ rhs->src->shape[0] ];
323
328
 
324
329
  // copy default value from the zero location in the Yale matrix
325
330
  LDType* default_val = ALLOC_N(LDType, 1);
@@ -333,35 +338,43 @@ LIST_STORAGE* create_from_yale_storage(const YALE_STORAGE* rhs, dtype_t l_dtype)
333
338
 
334
339
  NODE *last_row_added = NULL;
335
340
  // Walk through rows and columns as if RHS were a dense matrix
336
- for (RIType i = 0; i < rhs->shape[0]; ++i) {
341
+ for (RIType i = 0; i < shape[0]; ++i) {
342
+ RIType ri = i + rhs->offset[0];
343
+
337
344
  NODE *last_added = NULL;
338
345
 
339
346
  // Get boundaries of beginning and end of row
340
- RIType ija = rhs_ija[i],
341
- ija_next = rhs_ija[i+1];
347
+ RIType ija = rhs_ija[ri],
348
+ ija_next = rhs_ija[ri+1];
342
349
 
343
350
  // Are we going to need to add a diagonal for this row?
344
351
  bool add_diag = false;
345
- if (rhs_a[i] != R_ZERO) add_diag = true;
352
+ if (rhs_a[ri] != R_ZERO) add_diag = true; // non-zero and located within the bounds of the slice
346
353
 
347
354
  if (ija < ija_next || add_diag) {
355
+ ija = nm::yale_storage::binary_search_left_boundary<RIType>(rhs, ija, ija_next-1, rhs->offset[1]);
348
356
 
349
357
  LIST* curr_row = list::create();
350
358
 
351
359
  LDType* insert_val;
352
360
 
353
361
  while (ija < ija_next) {
354
- RIType jj = rhs_ija[ija]; // what column number is this?
362
+ // Find first column in slice
363
+ RIType rj = rhs_ija[ija];
364
+ RIType j = rj - rhs->offset[1];
355
365
 
356
366
  // Is there a nonzero diagonal item between the previously added item and the current one?
357
- if (jj > i && add_diag) {
367
+ if (rj > ri && add_diag) {
358
368
  // Allocate and copy insertion value
359
369
  insert_val = ALLOC_N(LDType, 1);
360
- *insert_val = static_cast<LDType>(rhs_a[i]);
370
+ *insert_val = static_cast<LDType>(rhs_a[ri]);
361
371
 
362
- // insert the item in the list at the appropriate location
363
- if (last_added) last_added = list::insert_after(last_added, i, insert_val);
364
- else last_added = list::insert(curr_row, false, i, insert_val);
372
+ // Insert the item in the list at the appropriate location.
373
+ // What is the appropriate key? Well, it's definitely right(i)==right(j), but the
374
+ // rj index has already been advanced past ri. So we should treat ri as the column and
375
+ // subtract offset[1].
376
+ if (last_added) last_added = list::insert_after(last_added, ri - rhs->offset[1], insert_val);
377
+ else last_added = list::insert(curr_row, false, ri - rhs->offset[1], insert_val);
365
378
 
366
379
  // don't add again!
367
380
  add_diag = false;
@@ -371,20 +384,23 @@ LIST_STORAGE* create_from_yale_storage(const YALE_STORAGE* rhs, dtype_t l_dtype)
371
384
  insert_val = ALLOC_N(LDType, 1);
372
385
  *insert_val = static_cast<LDType>(rhs_a[ija]);
373
386
 
374
- if (last_added) last_added = list::insert_after(last_added, jj, insert_val);
375
- else last_added = list::insert(curr_row, false, jj, insert_val);
387
+ if (last_added) last_added = list::insert_after(last_added, j, insert_val);
388
+ else last_added = list::insert(curr_row, false, j, insert_val);
376
389
 
377
390
  ++ija; // move to next entry in Yale matrix
378
391
  }
379
392
 
380
393
  if (add_diag) {
394
+
381
395
  // still haven't added the diagonal.
382
396
  insert_val = ALLOC_N(LDType, 1);
383
- *insert_val = static_cast<LDType>(rhs_a[i]);
397
+ *insert_val = static_cast<LDType>(rhs_a[ri]);
384
398
 
385
399
  // insert the item in the list at the appropriate location
386
- if (last_added) last_added = list::insert_after(last_added, i, insert_val);
387
- else last_added = list::insert(curr_row, false, i, insert_val);
400
+ if (last_added) last_added = list::insert_after(last_added, ri - rhs->offset[1], insert_val);
401
+ else last_added = list::insert(curr_row, false, ri - rhs->offset[1], insert_val);
402
+
403
+ // no need to set add_diag to false because it'll be reset automatically in next iteration.
388
404
  }
389
405
 
390
406
  // Now add the list at the appropriate location
@@ -458,6 +474,7 @@ namespace yale_storage { // FIXME: Move to yale.cpp
458
474
  */
459
475
  template <typename LDType, typename RDType, typename LIType>
460
476
  YALE_STORAGE* create_from_dense_storage(const DENSE_STORAGE* rhs, dtype_t l_dtype, void* init) {
477
+
461
478
  if (rhs->dim != 2) rb_raise(nm_eStorageTypeError, "can only convert matrices of dim 2 to yale");
462
479
 
463
480
  LIType pos = 0;
@@ -504,16 +521,15 @@ namespace yale_storage { // FIXME: Move to yale.cpp
504
521
 
505
522
  // Start just after the zero position.
506
523
  LIType ija = shape[0]+1;
507
- LIType i;
508
524
  pos = 0;
509
525
 
510
526
  // Copy contents
511
- for (i = 0; i < rhs->shape[0]; ++i) {
527
+ for (LIType i = 0; i < rhs->shape[0]; ++i) {
512
528
  // indicate the beginning of a row in the IJA array
513
- lhs_ija[i]= ija;
529
+ lhs_ija[i] = ija;
514
530
 
515
531
  for (LIType j = 0; j < rhs->shape[1]; ++j) {
516
- pos = rhs->stride[0]*(i + rhs->offset[0]) + rhs->stride[1]*(j + rhs->offset[1]); // calc position with offsets
532
+ pos = rhs->stride[0] * (i + rhs->offset[0]) + rhs->stride[1] * (j + rhs->offset[1]); // calc position with offsets
517
533
 
518
534
  if (i == j) { // copy to diagonal
519
535
  lhs_a[i] = static_cast<LDType>(rhs_elements[pos]);
@@ -570,13 +586,13 @@ namespace yale_storage { // FIXME: Move to yale.cpp
570
586
  // Copy contents
571
587
  for (NODE* i_curr = rhs->rows->first; i_curr; i_curr = i_curr->next) {
572
588
 
573
- // Shrink refernce
589
+ // Shrink reference
574
590
  int i = i_curr->key - rhs->offset[0];
575
591
  if (i < 0 || i >= (int)rhs->shape[0]) continue;
576
592
 
577
593
  for (NODE* j_curr = ((LIST*)(i_curr->val))->first; j_curr; j_curr = j_curr->next) {
578
594
 
579
- // Shrink refernce
595
+ // Shrink reference
580
596
  int j = j_curr->key - rhs->offset[1];
581
597
  if (j < 0 || j >= (int)rhs->shape[1]) continue;
582
598
 
@@ -44,6 +44,7 @@
44
44
  #include <cstdio> // std::fprintf
45
45
  #include <iostream>
46
46
  #include <array>
47
+ #include <typeinfo>
47
48
 
48
49
  #define RB_P(OBJ) \
49
50
  rb_funcall(rb_stderr, rb_intern("print"), 1, rb_funcall(OBJ, rb_intern("object_id"), 0)); \
@@ -54,9 +55,8 @@
54
55
  */
55
56
 
56
57
  // #include "types.h"
57
- #include "util/math.h"
58
-
59
58
  #include "data/data.h"
59
+ #include "math/math.h"
60
60
 
61
61
  #include "common.h"
62
62
  #include "yale.h"
@@ -86,8 +86,11 @@ extern "C" {
86
86
  static YALE_STORAGE* nm_copy_alloc_struct(const YALE_STORAGE* rhs, const nm::dtype_t new_dtype, const size_t new_capacity, const size_t new_size);
87
87
  static YALE_STORAGE* alloc(nm::dtype_t dtype, size_t* shape, size_t dim, nm::itype_t min_itype);
88
88
 
89
+ static size_t yale_count_slice_copy_ndnz(const YALE_STORAGE* s, size_t*, size_t*);
90
+
89
91
  static void* default_value_ptr(const YALE_STORAGE* s);
90
92
  static VALUE default_value(const YALE_STORAGE* s);
93
+ static VALUE obj_at(YALE_STORAGE* s, size_t k);
91
94
 
92
95
  /* Ruby-accessible functions */
93
96
  static VALUE nm_size(VALUE self);
@@ -100,6 +103,9 @@ extern "C" {
100
103
 
101
104
  static VALUE nm_nd_row(int argc, VALUE* argv, VALUE self);
102
105
 
106
+ static inline size_t src_ndnz(const YALE_STORAGE* s) {
107
+ return reinterpret_cast<YALE_STORAGE*>(s->src)->ndnz;
108
+ }
103
109
 
104
110
  } // end extern "C" block
105
111
 
@@ -241,72 +247,6 @@ YALE_STORAGE* create_from_old_yale(dtype_t dtype, size_t* shape, void* r_ia, voi
241
247
  }
242
248
 
243
249
 
244
- /*
245
- * Take two Yale storages and merge them into a new Yale storage.
246
- *
247
- * Uses the left as a template for the creation of a new one.
248
- */
249
- template <typename DType, typename IType>
250
- YALE_STORAGE* create_merged__(const YALE_STORAGE* left, const YALE_STORAGE* right) {
251
- char ins_type;
252
-
253
- size_t size = get_size<IType>(left);
254
-
255
- // s represents the resulting storage
256
- YALE_STORAGE* s = copy_alloc_struct<IType>(left, left->dtype, NM_MAX(left->capacity, right->capacity), size);
257
-
258
- IType* sija = reinterpret_cast<IType*>(s->ija);
259
- IType* rija = reinterpret_cast<IType*>(right->ija);
260
-
261
- // set the element between D and LU (the boundary in A), which should be 0.
262
- reinterpret_cast<DType*>(s->a)[s->shape[0]] = reinterpret_cast<DType*>(left->a)[left->shape[0]];
263
-
264
- if (right && right != left) {
265
- // some operations are unary and don't need this; others are x+x and don't need this
266
-
267
- for (IType i = 0; i < s->shape[0]; ++i) {
268
-
269
- IType ija = sija[i];
270
- IType ija_next = sija[i+1];
271
-
272
- for (IType r_ija = rija[i]; r_ija < rija[i+1]; ++r_ija) {
273
-
274
- size_t ja = sija[ija]; // insert expects a size_t
275
-
276
- if (ija == ija_next) {
277
- // destination row is empty
278
- ins_type = vector_insert<DType,IType>(s, ija, &ja, NULL, 1, true);
279
- increment_ia_after<IType>(s, s->shape[0], i, 1);
280
- ++(s->ndnz);
281
- ++ija;
282
-
283
- if (ins_type == 'i') ++ija_next;
284
-
285
- } else {
286
- bool found;
287
-
288
- // merge positions into destination row
289
- IType pos = insert_search<IType>(s, ija, ija_next-1, sija[ija], &found);
290
-
291
- if (!found) {
292
- vector_insert<DType,IType>(s, pos, &ja, NULL, 1, true);
293
- increment_ia_after<IType>(s, s->shape[0], i, 1);
294
- ++(s->ndnz);
295
-
296
- if (ins_type == 'i') ++ija_next;
297
- }
298
-
299
- // can now set a left boundary for the next search
300
- ija = pos + 1;
301
- }
302
- }
303
- }
304
- }
305
-
306
- return s;
307
- }
308
-
309
-
310
250
  /*
311
251
  * Empty the matrix by initializing the IJA vector and setting the diagonal to 0.
312
252
  *
@@ -332,29 +272,27 @@ size_t max_size(YALE_STORAGE* s) {
332
272
 
333
273
  return result;
334
274
  }
275
+
276
+
335
277
  ///////////////
336
278
  // Accessors //
337
279
  ///////////////
338
280
 
281
+
339
282
  /*
340
- * Returns a slice of YALE_STORAGE object by copy
283
+ * Determine the number of non-diagonal non-zeros in a not-yet-created copy of a slice or matrix.
341
284
  */
342
- template <typename DType,typename IType>
343
- void* get(YALE_STORAGE* storage, SLICE* slice) {
344
-
345
- size_t *offset = slice->coords;
346
- // Copy shape for yale construction
347
- size_t* shape = ALLOC_N(size_t, 2);
348
- shape[0] = slice->lengths[0];
349
- shape[1] = slice->lengths[1];
285
+ template <typename DType, typename IType>
286
+ static size_t count_slice_copy_ndnz(const YALE_STORAGE* s, size_t* offset, size_t* shape) {
287
+ IType* ija = reinterpret_cast<IType*>(s->ija);
288
+ DType* a = reinterpret_cast<DType*>(s->a);
350
289
 
351
- IType* src_ija = reinterpret_cast<IType*>(storage->ija);
352
- DType* src_a = reinterpret_cast<DType*>(storage->a);
290
+ DType ZERO(*reinterpret_cast<DType*>(default_value_ptr(s)));
353
291
 
354
292
  // Calc ndnz for the destination
355
293
  size_t ndnz = 0;
356
- size_t i,j; // indexes of destination matrix
357
- size_t k,l; // indexes of source matrix
294
+ size_t i, j; // indexes of destination matrix
295
+ size_t k, l; // indexes of source matrix
358
296
  for (i = 0; i < shape[0]; i++) {
359
297
  k = i + offset[0];
360
298
  for (j = 0; j < shape[1]; j++) {
@@ -363,42 +301,54 @@ void* get(YALE_STORAGE* storage, SLICE* slice) {
363
301
  if (j == i) continue;
364
302
 
365
303
  if (k == l) { // for diagonal element of source
366
- if (src_a[k] != 0) ++ndnz;
304
+ if (a[k] != ZERO) ++ndnz;
367
305
  } else { // for non-diagonal element
368
- for (size_t c = src_ija[k]; c < src_ija[k+1]; c++) {
369
- if (src_ija[c] == l) {
306
+ for (size_t c = ija[k]; c < ija[k+1]; c++) {
307
+ if (ija[c] == l) {
370
308
  ++ndnz;
371
309
  break;
372
310
  }
373
311
  }
374
312
  }
375
-
376
313
  }
377
314
  }
378
315
 
379
- size_t request_capacity = shape[0] + ndnz + 1;
380
- //fprintf(stderr, "yale get copy: shape0=%d, shape1=%d, ndnz=%d, request_capacity=%d\n", shape[0], shape[1], ndnz, request_capacity);
381
- YALE_STORAGE* ns = nm_yale_storage_create(storage->dtype, shape, 2, request_capacity, storage->itype);
316
+ return ndnz;
317
+ }
318
+
319
+
320
+
321
+ /*
322
+ * Copy some portion of a matrix into a new matrix.
323
+ */
324
+ template <typename LDType, typename RDType, typename IType>
325
+ static void slice_copy(YALE_STORAGE* ns, const YALE_STORAGE* s, size_t* offset, size_t* lengths, dtype_t new_dtype) {
326
+
327
+ IType* src_ija = reinterpret_cast<IType*>(s->ija);
328
+ RDType* src_a = reinterpret_cast<RDType*>(s->a);
382
329
 
383
- if (ns->capacity < request_capacity)
384
- rb_raise(nm_eStorageTypeError, "conversion failed; capacity of %ld requested, max allowable is %ld", request_capacity, ns->capacity);
330
+ RDType RZERO(*reinterpret_cast<RDType*>(default_value_ptr(s)));
385
331
 
386
332
  // Initialize the A and IJA arrays
387
- init<DType,IType>(ns, default_value_ptr(storage));
388
- IType* dst_ija = reinterpret_cast<IType*>(ns->ija);
389
- DType* dst_a = reinterpret_cast<DType*>(ns->a);
390
-
391
- size_t ija = shape[0] + 1;
392
- DType val = src_a[storage->shape[0]]; // use 0 as the default for copy
393
- for (i = 0; i < shape[0]; ++i) {
333
+ LDType val(RZERO); // need default value for init. Can't use ns default value because it's not initialized yet
334
+ init<LDType,IType>(ns, &val);
335
+ IType* dst_ija = reinterpret_cast<IType*>(ns->ija);
336
+ LDType* dst_a = reinterpret_cast<LDType*>(ns->a);
337
+
338
+ size_t ija = lengths[0] + 1;
339
+
340
+ size_t i, j; // indexes of destination matrix
341
+ size_t k, l; // indexes of source matrix
342
+
343
+ for (i = 0; i < lengths[0]; ++i) {
394
344
  k = i + offset[0];
395
- for (j = 0; j < shape[1]; ++j) {
345
+ for (j = 0; j < lengths[1]; ++j) {
396
346
  bool found = false;
397
347
  l = j + offset[1];
398
-
348
+
399
349
  // Get value from source matrix
400
350
  if (k == l) { // source diagonal
401
- if (src_a[k] != 0) { // don't bother copying non-zero values from the diagonal
351
+ if (src_a[k] != RZERO) { // don't bother copying non-zero values from the diagonal
402
352
  val = src_a[k];
403
353
  found = true;
404
354
  }
@@ -420,9 +370,8 @@ void* get(YALE_STORAGE* storage, SLICE* slice) {
420
370
  // copy non-diagonal element
421
371
  dst_ija[ija] = j;
422
372
  dst_a[ija] = val;
423
-
424
373
  ++ija;
425
- for (size_t c = i + 1; c <= shape[0]; ++c) {
374
+ for (size_t c = i + 1; c <= lengths[0]; ++c) {
426
375
  dst_ija[c] = ija;
427
376
  }
428
377
  }
@@ -430,39 +379,71 @@ void* get(YALE_STORAGE* storage, SLICE* slice) {
430
379
  }
431
380
  }
432
381
 
433
- dst_ija[shape[0]] = ija; // indicate the end of the last row
434
- ns->ndnz = ndnz;
435
- return ns;
382
+ dst_ija[lengths[0]] = ija; // indicate the end of the last row
383
+ ns->ndnz = ija - lengths[0] - 1; // update ndnz count
384
+ }
385
+
386
+
387
+ /*
388
+ * Get a single element of a yale storage object
389
+ */
390
+ template <typename DType, typename IType>
391
+ static void* get_single(YALE_STORAGE* storage, SLICE* slice) {
392
+
393
+ DType* a = reinterpret_cast<DType*>(storage->a);
394
+ IType* ija = reinterpret_cast<IType*>(storage->ija);
395
+
396
+ size_t coord0 = storage->offset[0] + slice->coords[0];
397
+ size_t coord1 = storage->offset[1] + slice->coords[1];
398
+
399
+ if (coord0 == coord1)
400
+ return &(a[ coord0 ]); // return diagonal entry
401
+
402
+ if (ija[coord0] == ija[coord0+1])
403
+ return &(a[ storage->src->shape[0] ]); // return zero pointer
404
+
405
+ // binary search for the column's location
406
+ int pos = binary_search<IType>(storage, ija[coord0], ija[coord0+1]-1, coord1);
407
+
408
+ if (pos != -1 && ija[pos] == coord1)
409
+ return &(a[pos]); // found exact value
410
+
411
+ return &(a[ storage->src->shape[0] ]); // return a pointer that happens to be zero
436
412
  }
413
+
414
+
437
415
  /*
438
416
  * Returns a pointer to the correct location in the A vector of a YALE_STORAGE object, given some set of coordinates
439
417
  * (the coordinates are stored in slice).
440
418
  */
441
419
  template <typename DType,typename IType>
442
- void* ref(YALE_STORAGE* storage, SLICE* slice) {
443
- size_t* coords = slice->coords;
420
+ void* ref(YALE_STORAGE* s, SLICE* slice) {
444
421
 
445
- if (!slice->single) rb_raise(rb_eNotImpError, "This type slicing not supported yet.");
422
+ YALE_STORAGE* ns = ALLOC( YALE_STORAGE );
446
423
 
447
- DType* a = reinterpret_cast<DType*>(storage->a);
448
- IType* ija = reinterpret_cast<IType*>(storage->ija);
424
+ ns->dim = s->dim;
425
+ ns->offset = ALLOC_N(size_t, ns->dim);
426
+ ns->shape = ALLOC_N(size_t, ns->dim);
449
427
 
450
- if (coords[0] == coords[1])
451
- return &(a[ coords[0] ]); // return diagonal entry
428
+ for (size_t i = 0; i < ns->dim; ++i) {
429
+ ns->offset[i] = slice->coords[i] + s->offset[i];
430
+ ns->shape[i] = slice->lengths[i];
431
+ }
452
432
 
453
- if (ija[coords[0]] == ija[coords[0]+1])
454
- return &(a[ storage->shape[0] ]); // return zero pointer
433
+ ns->dtype = s->dtype;
434
+ ns->itype = s->itype; // or should we go by shape?
455
435
 
456
- // binary search for the column's location
457
- int pos = binary_search<IType>(storage,
458
- ija[coords[0]],
459
- ija[coords[0]+1]-1,
460
- coords[1]);
436
+ ns->a = s->a;
437
+ ns->ija = s->ija;
461
438
 
462
- if (pos != -1 && ija[pos] == coords[1])
463
- return &(a[pos]); // found exact value
439
+ ns->src = s->src;
440
+ s->src->count++;
441
+
442
+ ns->ndnz = 0;
443
+ ns->capacity= 0;
444
+
445
+ return ns;
464
446
 
465
- return &(a[ storage->shape[0] ]); // return a pointer that happens to be zero
466
447
  }
467
448
 
468
449
  /*
@@ -472,22 +453,23 @@ void* ref(YALE_STORAGE* storage, SLICE* slice) {
472
453
  template <typename DType, typename IType>
473
454
  char set(YALE_STORAGE* storage, SLICE* slice, void* value) {
474
455
  DType* v = reinterpret_cast<DType*>(value);
475
- size_t* coords = slice->coords;
456
+ size_t coord0 = storage->offset[0] + slice->coords[0],
457
+ coord1 = storage->offset[1] + slice->coords[1];
476
458
 
477
459
  bool found = false;
478
460
  char ins_type;
479
461
 
480
- if (coords[0] == coords[1]) {
481
- reinterpret_cast<DType*>(storage->a)[coords[0]] = *v; // set diagonal
462
+ if (coord0 == coord1) {
463
+ reinterpret_cast<DType*>(storage->a)[coord0] = *v; // set diagonal
482
464
  return 'r';
483
465
  }
484
466
 
485
467
  // Get IJA positions of the beginning and end of the row
486
- if (reinterpret_cast<IType*>(storage->ija)[coords[0]] == reinterpret_cast<IType*>(storage->ija)[coords[0]+1]) {
468
+ if (reinterpret_cast<IType*>(storage->ija)[coord0] == reinterpret_cast<IType*>(storage->ija)[coord0+1]) {
487
469
  // empty row
488
- ins_type = vector_insert<DType,IType>(storage, reinterpret_cast<IType*>(storage->ija)[coords[0]], &(coords[1]), v, 1, false);
489
- increment_ia_after<IType>(storage, storage->shape[0], coords[0], 1);
490
- storage->ndnz++;
470
+ ins_type = vector_insert<DType,IType>(storage, reinterpret_cast<IType*>(storage->ija)[coord0], &(coord1), v, 1, false);
471
+ increment_ia_after<IType>(storage, storage->shape[0], coord0, 1);
472
+ reinterpret_cast<YALE_STORAGE*>(storage->src)->ndnz++;
491
473
 
492
474
  return ins_type;
493
475
  }
@@ -498,19 +480,19 @@ char set(YALE_STORAGE* storage, SLICE* slice, void* value) {
498
480
 
499
481
  // Do a binary search for the column
500
482
  size_t pos = insert_search<IType>(storage,
501
- reinterpret_cast<IType*>(storage->ija)[coords[0]],
502
- reinterpret_cast<IType*>(storage->ija)[coords[0]+1]-1,
503
- coords[1], &found);
483
+ reinterpret_cast<IType*>(storage->ija)[coord0],
484
+ reinterpret_cast<IType*>(storage->ija)[coord0+1]-1,
485
+ coord1, &found);
504
486
 
505
487
  if (found) { // replace
506
- reinterpret_cast<IType*>(storage->ija)[pos] = coords[1];
488
+ reinterpret_cast<IType*>(storage->ija)[pos] = coord1;
507
489
  reinterpret_cast<DType*>(storage->a)[pos] = *v;
508
490
  return 'r';
509
491
  }
510
492
 
511
- ins_type = vector_insert<DType,IType>(storage, pos, &(coords[1]), v, 1, false);
512
- increment_ia_after<IType>(storage, storage->shape[0], coords[0], 1);
513
- storage->ndnz++;
493
+ ins_type = vector_insert<DType,IType>(storage, pos, &(coord1), v, 1, false);
494
+ increment_ia_after<IType>(storage, storage->shape[0], coord0, 1);
495
+ reinterpret_cast<YALE_STORAGE*>(storage->src)->ndnz++;
514
496
 
515
497
  return ins_type;
516
498
  }
@@ -682,6 +664,31 @@ static bool ndrow_is_empty(const YALE_STORAGE* s, IType ija, const IType ija_nex
682
664
  // Utility //
683
665
  /////////////
684
666
 
667
+
668
+ /*
669
+ * Binary search for finding the beginning of a slice. Returns the position of the first element which is larger than
670
+ * bound.
671
+ */
672
+ template <typename IType>
673
+ IType binary_search_left_boundary(const YALE_STORAGE* s, IType left, IType right, IType bound) {
674
+ if (left > right) return -1;
675
+
676
+ IType* ija = reinterpret_cast<IType*>(s->ija);
677
+
678
+ if (ija[left] >= bound) return left; // shortcut
679
+
680
+ IType mid = (left + right) / 2;
681
+ IType mid_j = ija[mid];
682
+
683
+ if (mid_j == bound)
684
+ return mid;
685
+ else if (mid_j > bound) { // eligible! don't exclude it.
686
+ return binary_search_left_boundary<IType>(s, left, mid, bound);
687
+ } else // (mid_j < bound)
688
+ return binary_search_left_boundary<IType>(s, mid + 1, right, bound);
689
+ }
690
+
691
+
685
692
  /*
686
693
  * Binary search for returning stored values. Returns a non-negative position, or -1 for not found.
687
694
  */
@@ -710,6 +717,8 @@ int binary_search(YALE_STORAGE* s, IType left, IType right, IType key) {
710
717
  * Resize yale storage vectors A and IJA, copying values.
711
718
  */
712
719
  static void vector_grow(YALE_STORAGE* s) {
720
+ if (s->src != s) throw; // need to correct this quickly.
721
+
713
722
  size_t new_capacity = s->capacity * GROWTH_CONSTANT;
714
723
  size_t max_capacity = max_size(s);
715
724
 
@@ -742,6 +751,8 @@ static void vector_grow(YALE_STORAGE* s) {
742
751
  */
743
752
  template <typename DType, typename IType>
744
753
  static char vector_insert_resize(YALE_STORAGE* s, size_t current_size, size_t pos, size_t* j, size_t n, bool struct_only) {
754
+ if (s != s->src) throw;
755
+
745
756
  // Determine the new capacity for the IJA and A vectors.
746
757
  size_t new_capacity = s->capacity * GROWTH_CONSTANT;
747
758
  size_t max_capacity = max_size(s);
@@ -916,15 +927,30 @@ static IType insert_search(YALE_STORAGE* s, IType left, IType right, IType key,
916
927
  template <typename LDType, typename RDType, typename IType>
917
928
  YALE_STORAGE* cast_copy(const YALE_STORAGE* rhs, dtype_t new_dtype) {
918
929
 
919
- // Allocate a new structure
920
- size_t size = get_size<IType>(rhs);
921
- YALE_STORAGE* lhs = copy_alloc_struct<IType>(rhs, new_dtype, rhs->capacity, size);
930
+ YALE_STORAGE* lhs;
922
931
 
923
- if (rhs->dtype == new_dtype) { // FIXME: Test if this condition is actually faster; second condition should work just as well.
932
+ if (rhs->src != rhs) { // copy the reference
933
+ // Copy shape for yale construction
934
+ size_t* shape = ALLOC_N(size_t, 2);
935
+ shape[0] = rhs->shape[0];
936
+ shape[1] = rhs->shape[1];
937
+ size_t ndnz = src_ndnz(rhs);
938
+ if (shape[0] != rhs->src->shape[0] || shape[1] != rhs->src->shape[1])
939
+ ndnz = count_slice_copy_ndnz<RDType,IType>(rhs, rhs->offset, rhs->shape); // expensive, avoid if possible
940
+ size_t request_capacity = shape[0] + ndnz + 1;
941
+ // FIXME: Should we use a different itype? Or same?
942
+ lhs = nm_yale_storage_create(new_dtype, shape, 2, request_capacity, rhs->itype);
924
943
 
925
- memcpy(lhs->a, rhs->a, size * DTYPE_SIZES[new_dtype]);
944
+ // This check probably isn't necessary.
945
+ if (lhs->capacity < request_capacity)
946
+ rb_raise(nm_eStorageTypeError, "conversion failed; capacity of %ld requested, max allowable is %ld", request_capacity, lhs->capacity);
926
947
 
927
- } else {
948
+ slice_copy<LDType, RDType, IType>(lhs, rhs, rhs->offset, rhs->shape, new_dtype);
949
+ } else { // regular copy
950
+
951
+ // Allocate a new structure
952
+ size_t size = get_size<IType>(rhs);
953
+ lhs = copy_alloc_struct<IType>(rhs, new_dtype, rhs->capacity, size);
928
954
 
929
955
  LDType* la = reinterpret_cast<LDType*>(lhs->a);
930
956
  RDType* ra = reinterpret_cast<RDType*>(rhs->a);
@@ -932,7 +958,6 @@ YALE_STORAGE* cast_copy(const YALE_STORAGE* rhs, dtype_t new_dtype) {
932
958
  for (size_t index = 0; index < size; ++index) {
933
959
  la[index] = ra[index];
934
960
  }
935
-
936
961
  }
937
962
 
938
963
  return lhs;
@@ -958,7 +983,10 @@ static YALE_STORAGE* copy_alloc_struct(const YALE_STORAGE* rhs, const dtype_t ne
958
983
  lhs->shape = ALLOC_N( size_t, lhs->dim );
959
984
  lhs->offset = ALLOC_N( size_t, lhs->dim );
960
985
  memcpy(lhs->shape, rhs->shape, lhs->dim * sizeof(size_t));
961
- memcpy(lhs->shape, rhs->shape, lhs->dim * sizeof(size_t));
986
+ //memcpy(lhs->offset, rhs->offset, lhs->dim * sizeof(size_t));
987
+ lhs->offset[0] = 0;
988
+ lhs->offset[1] = 0;
989
+
962
990
  lhs->itype = rhs->itype;
963
991
  lhs->capacity = new_capacity;
964
992
  lhs->dtype = new_dtype;
@@ -967,12 +995,16 @@ static YALE_STORAGE* copy_alloc_struct(const YALE_STORAGE* rhs, const dtype_t ne
967
995
  lhs->ija = ALLOC_N( IType, lhs->capacity );
968
996
  lhs->a = ALLOC_N( char, DTYPE_SIZES[new_dtype] * lhs->capacity );
969
997
  lhs->src = lhs;
998
+ lhs->count = 1;
970
999
 
971
1000
  // Now copy the contents -- but only within the boundaries set by the size. Leave
972
1001
  // the rest uninitialized.
973
- for (size_t i = 0; i < get_size<IType>(rhs); ++i)
974
- reinterpret_cast<IType*>(lhs->ija)[i] = reinterpret_cast<IType*>(rhs->ija)[i]; // copy indices
975
-
1002
+ if (!rhs->offset[0] && !rhs->offset[1]) {
1003
+ for (size_t i = 0; i < get_size<IType>(rhs); ++i)
1004
+ reinterpret_cast<IType*>(lhs->ija)[i] = reinterpret_cast<IType*>(rhs->ija)[i]; // copy indices
1005
+ } else {
1006
+ rb_raise(rb_eNotImpError, "cannot copy struct due to different offsets");
1007
+ }
976
1008
  return lhs;
977
1009
  }
978
1010
 
@@ -1045,13 +1077,6 @@ static std::array<size_t,2> get_offsets(YALE_STORAGE* x) {
1045
1077
  }
1046
1078
 
1047
1079
 
1048
- static VALUE obj_at(YALE_STORAGE* s, size_t k) {
1049
- if (s->dtype == nm::RUBYOBJ) return reinterpret_cast<VALUE*>(s->a)[k];
1050
- else return rubyobj_from_cval(reinterpret_cast<void*>(reinterpret_cast<char*>(s->a) + k * DTYPE_SIZES[s->dtype]), s->dtype).rval;
1051
- }
1052
-
1053
-
1054
-
1055
1080
  template <typename IType>
1056
1081
  class IJAManager {
1057
1082
  protected:
@@ -1127,10 +1152,6 @@ public:
1127
1152
  }
1128
1153
 
1129
1154
  inline IType proper_j() const {
1130
- //if (!diag && k >= s->capacity) {
1131
- // std::cerr << "proper_j(): Warning: (nondiag) k exceeded capacity at row " << int(i) << ": k=" << int(k) << ", cap=" << s->capacity << std::endl;
1132
- // throw;
1133
- //}
1134
1155
  return diag ? i : ija[k];
1135
1156
  }
1136
1157
 
@@ -1193,7 +1214,6 @@ public:
1193
1214
  } else if (!row_has_diag()) { // row has no diagonal entries
1194
1215
  if (row_has_no_nd() || k_is_last_nd()) End = true; // row is totally empty, or we're at last entry
1195
1216
  else k++; // still entries to visit
1196
- // } else if (row_has_no_nd()) { // in this case we started at diag, so don't check it
1197
1217
  } else { // not at diag but it exists somewhere in the row, and row has at least one nd entry
1198
1218
  if (diag_is_ahead()) { // diag is ahead
1199
1219
  if (k_is_last_nd()) diag = true; // diag is next and last
@@ -1207,9 +1227,6 @@ public:
1207
1227
  }
1208
1228
  }
1209
1229
 
1210
- //if (k >= s->capacity)
1211
- // std::cerr << "operator++: Warning: k has exceeded capacity for row " << int(i) << "; k=" << int(k) << ", cap=" << s->capacity << std::endl;
1212
-
1213
1230
  return *this;
1214
1231
  }
1215
1232
 
@@ -1222,6 +1239,7 @@ public:
1222
1239
  };
1223
1240
 
1224
1241
 
1242
+
1225
1243
  template <typename IType>
1226
1244
  static VALUE map_stored(VALUE self) {
1227
1245
 
@@ -1236,7 +1254,15 @@ static VALUE map_stored(VALUE self) {
1236
1254
  RETURN_SIZED_ENUMERATOR(self, 0, 0, nm_yale_enumerator_length);
1237
1255
  VALUE init = rb_yield(default_value(s));
1238
1256
 
1239
- YALE_STORAGE* r = nm_yale_storage_create(nm::RUBYOBJ, shape, 2, s->capacity, NM_ITYPE(self));
1257
+ // Try to find a reasonable capacity to request when creating the matrix
1258
+ size_t ndnz = src_ndnz(s);
1259
+ if (s->src != s) // need to guess capacity
1260
+ ndnz = yale_count_slice_copy_ndnz(s, s->offset, s->shape);
1261
+ size_t request_capacity = s->shape[0] + ndnz + 1;
1262
+
1263
+ YALE_STORAGE* r = nm_yale_storage_create(nm::RUBYOBJ, shape, 2, request_capacity, NM_ITYPE(self));
1264
+ if (r->capacity < request_capacity)
1265
+ rb_raise(nm_eStorageTypeError, "conversion failed; capacity of %ld requested, max allowable is %ld", request_capacity, r->capacity);
1240
1266
  nm_yale_storage_init(r, &init);
1241
1267
 
1242
1268
  for (IType ri = 0; ri < shape[0]; ++ri) {
@@ -1314,7 +1340,16 @@ static VALUE map_merged_stored(VALUE left, VALUE right, VALUE init, nm::itype_t
1314
1340
  if (init == Qnil)
1315
1341
  init = rb_yield_values(2, s_init, t_init);
1316
1342
 
1317
- YALE_STORAGE* r = nm_yale_storage_create(nm::RUBYOBJ, shape, 2, NM_MAX(s->capacity, t->capacity), itype);
1343
+ // Make a reasonable approximation of the resulting capacity
1344
+ size_t s_ndnz = src_ndnz(s), t_ndnz = src_ndnz(t);
1345
+ if (s->src != s) s_ndnz = yale_count_slice_copy_ndnz(s, s->offset, s->shape);
1346
+ if (t->src != t) t_ndnz = yale_count_slice_copy_ndnz(t, t->offset, t->shape);
1347
+ size_t request_capacity = shape[0] + NM_MAX(s_ndnz, t_ndnz) + 1;
1348
+
1349
+ YALE_STORAGE* r = nm_yale_storage_create(nm::RUBYOBJ, shape, 2, request_capacity, itype);
1350
+ if (r->capacity < request_capacity)
1351
+ rb_raise(nm_eStorageTypeError, "conversion failed; capacity of %ld requested, max allowable is %ld", request_capacity, r->capacity);
1352
+
1318
1353
  nm_yale_storage_init(r, &init);
1319
1354
 
1320
1355
  IJAManager<IType> sm(s, itype),
@@ -1325,7 +1360,7 @@ static VALUE map_merged_stored(VALUE left, VALUE right, VALUE init, nm::itype_t
1325
1360
  RowIterator<IType> tit(t, tm.ija, ri + t_offsets[0], shape[1], t_offsets[1]);
1326
1361
 
1327
1362
  RowIterator<IType> rit(r, reinterpret_cast<IType*>(r->ija), ri, shape[1]);
1328
- while (!rit.end() && (!sit.end() || !tit.end())) {
1363
+ while (!sit.end() || !tit.end()) {
1329
1364
  VALUE rv;
1330
1365
  IType rj;
1331
1366
 
@@ -1360,179 +1395,99 @@ static VALUE map_merged_stored(VALUE left, VALUE right, VALUE init, nm::itype_t
1360
1395
  }
1361
1396
 
1362
1397
 
1363
- } // end of namespace nm::yale_storage
1364
-
1365
-
1366
- // Helper function used only for the RETURN_SIZED_ENUMERATOR macro. Returns the length of
1367
- // the matrix's storage.
1368
- static VALUE nm_yale_stored_enumerator_length(VALUE nmatrix) {
1369
- long len = nm_yale_storage_get_size(NM_STORAGE_YALE(nmatrix));
1370
- return LONG2NUM(len);
1371
- }
1372
-
1373
-
1374
-
1398
+ /*
1399
+ * This function and the two helper structs enable us to use partial template specialization.
1400
+ * See also: http://stackoverflow.com/questions/6623375/c-template-specialization-on-functions
1401
+ */
1375
1402
  template <typename DType, typename IType>
1376
- struct yale_iteration_helper {
1377
-
1378
- static VALUE iterate_with_indices(VALUE nm) {
1379
- YALE_STORAGE* s = NM_STORAGE_YALE(nm);
1380
- DType* a = reinterpret_cast<DType*>(s->a);
1381
- IType* ija = reinterpret_cast<IType*>(s->ija);
1382
-
1383
- // If we don't have a block, return an enumerator.
1384
- RETURN_SIZED_ENUMERATOR(nm, 0, 0, nm_yale_enumerator_length);
1403
+ static VALUE each_stored_with_indices(VALUE nm) {
1404
+ YALE_STORAGE* s = NM_STORAGE_YALE(nm);
1405
+ DType* a = reinterpret_cast<DType*>(s->a);
1406
+ IType* ija = reinterpret_cast<IType*>(s->ija);
1385
1407
 
1386
- // Iterate in two dimensions.
1387
- for (long i = 0; i < s->shape[0]; ++i) {
1388
- VALUE ii = LONG2NUM(i);
1408
+ // If we don't have a block, return an enumerator.
1409
+ RETURN_SIZED_ENUMERATOR(nm, 0, 0, nm_yale_stored_enumerator_length);
1389
1410
 
1390
- IType k = ija[i], k_next = ija[i+1];
1391
-
1392
- for (long j = 0; j < s->shape[1]; ++j) {
1393
- VALUE v, jj = LONG2NUM(j);
1394
-
1395
- // zero is stored in s->shape[0]
1396
- if (i == j) {
1397
- v = rubyobj_from_cval(&(a[i]), NM_DTYPE(nm)).rval;
1398
- } else {
1399
- // Walk through the row until we find the correct location.
1400
- while (ija[k] < j && k < k_next) ++k;
1401
- if (k < k_next && ija[k] == j) {
1402
- v = rubyobj_from_cval(&(a[k]), NM_DTYPE(nm)).rval;
1403
- ++k;
1404
- } else v = rubyobj_from_cval(&(a[s->shape[0]]), NM_DTYPE(nm)).rval;
1405
- }
1406
- rb_yield_values(3, v, ii, jj);
1407
- }
1408
- }
1411
+ // Iterate along diagonal
1412
+ for (size_t sk = NM_MAX(s->offset[0], s->offset[1]); sk < NM_MIN(s->shape[0] + s->offset[0], s->shape[1] + s->offset[1]); ++sk) {
1413
+ VALUE ii = LONG2NUM(sk - s->offset[0]),
1414
+ jj = LONG2NUM(sk - s->offset[1]);
1409
1415
 
1410
- return nm;
1416
+ rb_yield_values(3, obj_at(s, sk), ii, jj);
1411
1417
  }
1412
1418
 
1419
+ // Iterate through non-diagonal elements, row by row
1420
+ for (long ri = 0; ri < s->shape[0]; ++ri) {
1421
+ long si = ri + s->offset[0];
1422
+ IType p = ija[si],
1423
+ next_p = ija[si+1];
1413
1424
 
1414
- static VALUE iterate_stored_with_indices(VALUE nm) {
1415
-
1416
- YALE_STORAGE* s = NM_STORAGE_YALE(nm);
1417
- DType* a = reinterpret_cast<DType*>(s->a);
1418
- IType* ija = reinterpret_cast<IType*>(s->ija);
1419
-
1420
- // If we don't have a block, return an enumerator.
1421
- RETURN_SIZED_ENUMERATOR(nm, 0, 0, nm_yale_stored_enumerator_length);
1422
-
1423
- // Iterate along diagonal
1424
- for (size_t k = 0; k < s->shape[0]; ++k) {
1425
- VALUE ii = LONG2NUM(k),
1426
- jj = LONG2NUM(k);
1425
+ // if this is a reference to another matrix, we should find the left boundary of the slice
1426
+ if (s != s->src && p < next_p)
1427
+ p = binary_search_left_boundary<IType>(s, p, next_p-1, s->offset[1]);
1427
1428
 
1428
- VALUE v = rubyobj_from_cval(&(a[k]), NM_DTYPE(nm)).rval;
1429
- rb_yield_values(3, v, ii, jj );
1430
- }
1431
-
1432
- // Iterate through non-diagonal elements, row by row
1433
- for (long i = 0; i < s->shape[0]; ++i) {
1434
- long p = static_cast<long>( ija[i] ),
1435
- next_p = static_cast<long>( ija[i+1] );
1429
+ for (; p < next_p; ++p) {
1430
+ long sj = static_cast<long>(ija[p]),
1431
+ rj = sj - s->offset[1];
1432
+ if (rj < 0) continue;
1436
1433
 
1437
- for (; p < next_p; ++p) {
1438
- long j = static_cast<long>(ija[p]);
1439
- VALUE ii = LONG2NUM(i),
1440
- jj = LONG2NUM(j);
1434
+ if (rj >= s->shape[1]) break;
1441
1435
 
1442
- VALUE v = rubyobj_from_cval(&(a[p]), NM_DTYPE(nm)).rval;
1443
- rb_yield_values(3, v, ii, jj);
1444
- }
1436
+ rb_yield_values(3, obj_at(s, p), LONG2NUM(ri), LONG2NUM(rj));
1445
1437
  }
1446
-
1447
- return nm;
1448
1438
  }
1449
- };
1450
1439
 
1440
+ return nm;
1441
+ }
1451
1442
 
1452
- template <typename IType>
1453
- struct yale_iteration_helper<RubyObject, IType> {
1454
- static VALUE iterate_with_indices(VALUE nm) {
1455
- YALE_STORAGE* s = NM_STORAGE_YALE(nm);
1456
- RubyObject* a = reinterpret_cast<RubyObject*>(s->a);
1457
- IType* ija = reinterpret_cast<IType*>(s->ija);
1443
+ template <typename DType, typename IType>
1444
+ static VALUE each_with_indices(VALUE nm) {
1445
+ YALE_STORAGE* s = NM_STORAGE_YALE(nm);
1446
+ DType* a = reinterpret_cast<DType*>(s->a);
1447
+ IType* ija = reinterpret_cast<IType*>(s->ija);
1458
1448
 
1459
- // If we don't have a block, return an enumerator.
1460
- RETURN_SIZED_ENUMERATOR(nm, 0, 0, nm_yale_enumerator_length);
1449
+ // If we don't have a block, return an enumerator.
1450
+ RETURN_SIZED_ENUMERATOR(nm, 0, 0, nm_yale_enumerator_length);
1461
1451
 
1462
- // Iterate in two dimensions.
1463
- for (long i = 0; i < s->shape[0]; ++i) {
1464
- VALUE ii = LONG2NUM(i);
1452
+ // Iterate in two dimensions.
1453
+ // s stands for src, r stands for ref (for ri, rj, si, sj)
1454
+ for (long ri = 0; ri < s->shape[0]; ++ri) {
1455
+ long si = ri + s->offset[0];
1456
+ VALUE ii = LONG2NUM(ri + s->offset[0]);
1465
1457
 
1466
- IType k = ija[i], k_next = ija[i+1];
1458
+ IType k = ija[si], k_next = ija[si+1];
1467
1459
 
1468
- for (long j = 0; j < s->shape[1]; ++j) {
1469
- VALUE v, jj = LONG2NUM(j);
1460
+ for (long rj = 0; rj < s->shape[1]; ++rj) {
1461
+ long sj = rj + s->offset[1];
1462
+ VALUE v, jj = LONG2NUM(rj);
1470
1463
 
1471
- // zero is stored in s->shape[0]
1472
- if (i == j) {
1473
- v = a[i].rval;
1474
- } else {
1475
- // Walk through the row until we find the correct location.
1476
- while (ija[k] < j && k < k_next) ++k;
1477
- if (k < k_next && ija[k] == j) {
1478
- v = a[k].rval;
1479
- ++k;
1480
- } else v = a[s->shape[0]].rval;
1481
- }
1482
- rb_yield_values(3, v, ii, jj);
1464
+ // zero is stored in s->shape[0]
1465
+ if (si == sj) {
1466
+ v = obj_at(s, si);
1467
+ } else {
1468
+ // Walk through the row until we find the correct location.
1469
+ while (ija[k] < sj && k < k_next) ++k;
1470
+ if (k < k_next && ija[k] == sj) {
1471
+ v = obj_at(s, k);
1472
+ ++k;
1473
+ } else v = default_value(s); // rubyobj_from_cval(&(a[s->shape[0]]), NM_DTYPE(nm)).rval;
1483
1474
  }
1475
+ rb_yield_values(3, v, ii, jj);
1484
1476
  }
1485
-
1486
- return nm;
1487
1477
  }
1488
1478
 
1489
- static VALUE iterate_stored_with_indices(VALUE nm) {
1490
-
1491
- YALE_STORAGE* s = NM_STORAGE_YALE(nm);
1492
- RubyObject* a = reinterpret_cast<RubyObject*>(s->a);
1493
- IType* ija = reinterpret_cast<IType*>(s->ija);
1494
-
1495
- // If we don't have a block, return an enumerator.
1496
- RETURN_SIZED_ENUMERATOR(nm, 0, 0, nm_yale_stored_enumerator_length);
1497
-
1498
- // Iterate along diagonal
1499
- for (size_t k = 0; k < s->shape[0]; ++k) {
1500
- VALUE ii = LONG2NUM(k),
1501
- jj = LONG2NUM(k);
1502
- rb_yield_values(3, a[k].rval, ii, jj ); // yield element, i, j
1503
- }
1504
-
1505
- // Iterate through non-diagonal elements, row by row
1506
- for (long i = 0; i < s->shape[0]; ++i) {
1507
- IType p = ija[i],
1508
- next_p = ija[i+1];
1509
-
1510
- for (; p < next_p; ++p) {
1511
- long j = static_cast<long>(ija[p]);
1512
- VALUE ii = LONG2NUM(i),
1513
- jj = LONG2NUM(j);
1514
-
1515
- rb_yield_values(3, a[p].rval, ii, jj );
1516
- }
1517
- }
1479
+ return nm;
1480
+ }
1518
1481
 
1519
- return nm;
1520
- }
1521
- };
1522
1482
 
1483
+ } // end of namespace nm::yale_storage
1523
1484
 
1524
- /*
1525
- * This function and the two helper structs enable us to use partial template specialization.
1526
- * See also: http://stackoverflow.com/questions/6623375/c-template-specialization-on-functions
1527
- */
1528
- template <typename DType, typename IType>
1529
- static VALUE yale_each_stored_with_indices(VALUE nm) {
1530
- return yale_iteration_helper<DType, IType>::iterate_stored_with_indices(nm);
1531
- }
1532
1485
 
1533
- template <typename DType, typename IType>
1534
- static VALUE yale_each_with_indices(VALUE nm) {
1535
- return yale_iteration_helper<DType, IType>::iterate_with_indices(nm);
1486
+ // Helper function used only for the RETURN_SIZED_ENUMERATOR macro. Returns the length of
1487
+ // the matrix's storage.
1488
+ static VALUE nm_yale_stored_enumerator_length(VALUE nmatrix) {
1489
+ long len = nm_yale_storage_get_size(NM_STORAGE_YALE(nmatrix));
1490
+ return LONG2NUM(len);
1536
1491
  }
1537
1492
 
1538
1493
 
@@ -1577,7 +1532,7 @@ VALUE nm_yale_each_with_indices(VALUE nmatrix) {
1577
1532
  nm::dtype_t d = NM_DTYPE(nmatrix);
1578
1533
  nm::itype_t i = NM_ITYPE(nmatrix);
1579
1534
 
1580
- NAMED_LI_DTYPE_TEMPLATE_TABLE(ttable, nm::yale_each_with_indices, VALUE, VALUE)
1535
+ NAMED_LI_DTYPE_TEMPLATE_TABLE(ttable, nm::yale_storage::each_with_indices, VALUE, VALUE)
1581
1536
 
1582
1537
  return ttable[d][i](nmatrix);
1583
1538
  }
@@ -1588,7 +1543,7 @@ VALUE nm_yale_each_stored_with_indices(VALUE nmatrix) {
1588
1543
  nm::dtype_t d = NM_DTYPE(nmatrix);
1589
1544
  nm::itype_t i = NM_ITYPE(nmatrix);
1590
1545
 
1591
- NAMED_LI_DTYPE_TEMPLATE_TABLE(ttable, nm::yale_each_stored_with_indices, VALUE, VALUE)
1546
+ NAMED_LI_DTYPE_TEMPLATE_TABLE(ttable, nm::yale_storage::each_stored_with_indices, VALUE, VALUE)
1592
1547
 
1593
1548
  return ttable[d][i](nmatrix);
1594
1549
  }
@@ -1606,18 +1561,53 @@ char nm_yale_storage_set(STORAGE* storage, SLICE* slice, void* v) {
1606
1561
  return ttable[casted_storage->dtype][casted_storage->itype](casted_storage, slice, v);
1607
1562
  }
1608
1563
 
1564
+
1565
+ /*
1566
+ * Determine the number of non-diagonal non-zeros in a not-yet-created copy of a slice or matrix.
1567
+ */
1568
+ static size_t yale_count_slice_copy_ndnz(const YALE_STORAGE* s, size_t* offset, size_t* shape) {
1569
+ NAMED_LI_DTYPE_TEMPLATE_TABLE(ttable, nm::yale_storage::count_slice_copy_ndnz, size_t, const YALE_STORAGE*, size_t*, size_t*)
1570
+
1571
+ return ttable[s->dtype][s->itype](s, offset, shape);
1572
+ }
1573
+
1574
+
1609
1575
  /*
1610
- * C accessor for yale_storage::get, which returns a slice of YALE_STORAGE object by coppy
1576
+ * C accessor for yale_storage::get, which returns a slice of YALE_STORAGE object by copy
1611
1577
  *
1612
1578
  * Slicing-related.
1613
1579
  */
1614
1580
  void* nm_yale_storage_get(STORAGE* storage, SLICE* slice) {
1615
- NAMED_LI_DTYPE_TEMPLATE_TABLE(ttable, nm::yale_storage::get, void*, YALE_STORAGE* storage, SLICE* slice);
1616
- YALE_STORAGE* s = (YALE_STORAGE*)storage;
1581
+ YALE_STORAGE* casted_storage = (YALE_STORAGE*)storage;
1617
1582
 
1583
+ if (slice->single) {
1584
+ NAMED_LI_DTYPE_TEMPLATE_TABLE(elem_copy_table, nm::yale_storage::get_single, void*, YALE_STORAGE*, SLICE*)
1618
1585
 
1619
- YALE_STORAGE* casted_storage = (YALE_STORAGE*)storage;
1620
- return ttable[casted_storage->dtype][casted_storage->itype](casted_storage, slice);
1586
+ return elem_copy_table[casted_storage->dtype][casted_storage->itype](casted_storage, slice);
1587
+ } else {
1588
+ // Copy shape for yale construction
1589
+ size_t* shape = ALLOC_N(size_t, 2);
1590
+ shape[0] = slice->lengths[0];
1591
+ shape[1] = slice->lengths[1];
1592
+
1593
+ // only count ndnz if our slice is smaller, otherwise use the given value
1594
+ size_t ndnz = src_ndnz(casted_storage);
1595
+ if (shape[0] != casted_storage->shape[0] || shape[1] != casted_storage->shape[1])
1596
+ ndnz = yale_count_slice_copy_ndnz(casted_storage, slice->coords, shape); // expensive operation
1597
+
1598
+ size_t request_capacity = shape[0] + ndnz + 1; // capacity of new matrix
1599
+ YALE_STORAGE* ns = nm_yale_storage_create(casted_storage->dtype, shape, 2, request_capacity, casted_storage->itype);
1600
+
1601
+ // This check probably isn't necessary.
1602
+ if (ns->capacity < request_capacity)
1603
+ rb_raise(nm_eStorageTypeError, "conversion failed; capacity of %ld requested, max allowable is %ld", request_capacity, ns->capacity);
1604
+
1605
+ NAMED_LRI_DTYPE_TEMPLATE_TABLE(slice_copy_table, nm::yale_storage::slice_copy, void, YALE_STORAGE* ns, const YALE_STORAGE* s, size_t*, size_t*, nm::dtype_t)
1606
+
1607
+ slice_copy_table[ns->dtype][casted_storage->dtype][casted_storage->itype](ns, casted_storage, slice->coords, slice->lengths, casted_storage->dtype);
1608
+
1609
+ return ns;
1610
+ }
1621
1611
  }
1622
1612
 
1623
1613
  /*
@@ -1644,10 +1634,15 @@ static void nm_yale_storage_increment_ia_after(YALE_STORAGE* s, size_t ija_size,
1644
1634
  * for some set of coordinates.
1645
1635
  */
1646
1636
  void* nm_yale_storage_ref(STORAGE* storage, SLICE* slice) {
1647
- NAMED_LI_DTYPE_TEMPLATE_TABLE(ttable, nm::yale_storage::ref, void*, YALE_STORAGE* storage, SLICE* slice);
1648
-
1649
1637
  YALE_STORAGE* casted_storage = (YALE_STORAGE*)storage;
1650
- return ttable[casted_storage->dtype][casted_storage->itype](casted_storage, slice);
1638
+
1639
+ if (slice->single) {
1640
+ NAMED_LI_DTYPE_TEMPLATE_TABLE(elem_copy_table, nm::yale_storage::get_single, void*, YALE_STORAGE*, SLICE*)
1641
+ return elem_copy_table[casted_storage->dtype][casted_storage->itype](casted_storage, slice);
1642
+ } else {
1643
+ NAMED_LI_DTYPE_TEMPLATE_TABLE(ref_table, nm::yale_storage::ref, void*, YALE_STORAGE* storage, SLICE* slice)
1644
+ return ref_table[casted_storage->dtype][casted_storage->itype](casted_storage, slice);
1645
+ }
1651
1646
  }
1652
1647
 
1653
1648
 
@@ -1685,11 +1680,20 @@ size_t nm_yale_storage_get_size(const YALE_STORAGE* storage) {
1685
1680
  }
1686
1681
 
1687
1682
 
1683
+
1688
1684
  /*
1689
1685
  * Return a void pointer to the matrix's default value entry.
1690
1686
  */
1691
1687
  static void* default_value_ptr(const YALE_STORAGE* s) {
1692
- return reinterpret_cast<void*>(reinterpret_cast<char*>(s->a) + (s->shape[0] * DTYPE_SIZES[s->dtype]));
1688
+ return reinterpret_cast<void*>(reinterpret_cast<char*>(s->a) + (s->src->shape[0] * DTYPE_SIZES[s->dtype]));
1689
+ }
1690
+
1691
+ /*
1692
+ * Return the Ruby object at a given location in storage.
1693
+ */
1694
+ static VALUE obj_at(YALE_STORAGE* s, size_t k) {
1695
+ if (s->dtype == nm::RUBYOBJ) return reinterpret_cast<VALUE*>(s->a)[k];
1696
+ else return rubyobj_from_cval(reinterpret_cast<void*>(reinterpret_cast<char*>(s->a) + k * DTYPE_SIZES[s->dtype]), s->dtype).rval;
1693
1697
  }
1694
1698
 
1695
1699
 
@@ -1816,11 +1820,26 @@ YALE_STORAGE* nm_yale_storage_create(nm::dtype_t dtype, size_t* shape, size_t di
1816
1820
  void nm_yale_storage_delete(STORAGE* s) {
1817
1821
  if (s) {
1818
1822
  YALE_STORAGE* storage = (YALE_STORAGE*)s;
1823
+ if (storage->count-- == 1) {
1824
+ xfree(storage->shape);
1825
+ xfree(storage->offset);
1826
+ xfree(storage->ija);
1827
+ xfree(storage->a);
1828
+ xfree(storage);
1829
+ }
1830
+ }
1831
+ }
1832
+
1833
+ /*
1834
+ * Destructor for the yale storage ref
1835
+ */
1836
+ void nm_yale_storage_delete_ref(STORAGE* s) {
1837
+ if (s) {
1838
+ YALE_STORAGE* storage = (YALE_STORAGE*)s;
1839
+ nm_yale_storage_delete( reinterpret_cast<STORAGE*>(storage->src) );
1819
1840
  xfree(storage->shape);
1820
1841
  xfree(storage->offset);
1821
- xfree(storage->ija);
1822
- xfree(storage->a);
1823
- xfree(storage);
1842
+ xfree(s);
1824
1843
  }
1825
1844
  }
1826
1845
 
@@ -1850,6 +1869,7 @@ void nm_yale_storage_mark(void* storage_base) {
1850
1869
  }
1851
1870
  }
1852
1871
 
1872
+
1853
1873
  /*
1854
1874
  * Allocates and initializes the basic struct (but not the IJA or A vectors).
1855
1875
  */
@@ -1867,6 +1887,7 @@ static YALE_STORAGE* alloc(nm::dtype_t dtype, size_t* shape, size_t dim, nm::ity
1867
1887
  s->dim = dim;
1868
1888
  s->itype = nm_yale_storage_itype_by_shape(shape);
1869
1889
  s->src = reinterpret_cast<STORAGE*>(s);
1890
+ s->count = 1;
1870
1891
 
1871
1892
  // See if a higher itype has been requested.
1872
1893
  if (static_cast<int8_t>(s->itype) < static_cast<int8_t>(min_itype))
@@ -1935,7 +1956,7 @@ static VALUE nm_a(int argc, VALUE* argv, VALUE self) {
1935
1956
  }
1936
1957
  VALUE ary = rb_ary_new4(size, vals);
1937
1958
 
1938
- for (size_t i = size; i < s->capacity; ++i)
1959
+ for (size_t i = size; i < reinterpret_cast<YALE_STORAGE*>(s->src)->capacity; ++i)
1939
1960
  rb_ary_push(ary, Qnil);
1940
1961
 
1941
1962
  return ary;
@@ -2008,7 +2029,7 @@ static VALUE nm_lu(VALUE self) {
2008
2029
 
2009
2030
  VALUE ary = rb_ary_new4(size - s->shape[0] - 1, vals);
2010
2031
 
2011
- for (size_t i = size; i < s->capacity; ++i)
2032
+ for (size_t i = size; i < reinterpret_cast<YALE_STORAGE*>(s->src)->capacity; ++i)
2012
2033
  rb_ary_push(ary, Qnil);
2013
2034
 
2014
2035
  return ary;
@@ -2053,7 +2074,7 @@ static VALUE nm_ja(VALUE self) {
2053
2074
 
2054
2075
  VALUE ary = rb_ary_new4(size - s->shape[0] - 1, vals);
2055
2076
 
2056
- for (size_t i = size; i < s->capacity; ++i)
2077
+ for (size_t i = size; i < reinterpret_cast<YALE_STORAGE*>(s->src)->capacity; ++i)
2057
2078
  rb_ary_push(ary, Qnil);
2058
2079
 
2059
2080
  return ary;
@@ -2083,7 +2104,7 @@ static VALUE nm_ija(int argc, VALUE* argv, VALUE self) {
2083
2104
 
2084
2105
  VALUE ary = rb_ary_new4(size, vals);
2085
2106
 
2086
- for (size_t i = size; i < s->capacity; ++i)
2107
+ for (size_t i = size; i < reinterpret_cast<YALE_STORAGE*>(s->src)->capacity; ++i)
2087
2108
  rb_ary_push(ary, Qnil);
2088
2109
 
2089
2110
  return ary;
@@ -2127,9 +2148,7 @@ static VALUE nm_nd_row(int argc, VALUE* argv, VALUE self) {
2127
2148
  size_t nextpos = FIX2INT(rubyobj_from_cval_by_itype((char*)(s->ija) + ITYPE_SIZES[itype]*(i+1), itype).rval);
2128
2149
  size_t diff = nextpos - pos;
2129
2150
 
2130
- //std::cerr << "diff = " << diff << "\tpos = " << pos << "\tnextpos = " << nextpos << std::endl;
2131
-
2132
- VALUE ret; // HERE
2151
+ VALUE ret;
2133
2152
  if (keys) {
2134
2153
  ret = rb_ary_new3(diff);
2135
2154
 
@@ -2212,7 +2231,7 @@ VALUE nm_vector_set(int argc, VALUE* argv, VALUE self) { //, VALUE i_, VALUE jv,
2212
2231
 
2213
2232
  char ins_type = nm_yale_storage_vector_insert(s, pos, j, vals, len, false, dtype, itype);
2214
2233
  nm_yale_storage_increment_ia_after(s, s->shape[0], i, len, itype);
2215
- s->ndnz += len;
2234
+ reinterpret_cast<YALE_STORAGE*>(s->src)->ndnz += len;
2216
2235
 
2217
2236
  // Return the updated position
2218
2237
  pos += len;