nmatrix 0.0.6 → 0.0.7

Sign up to get free protection for your applications and to get access to all the features.
Files changed (67) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +2 -0
  3. data/Gemfile +5 -0
  4. data/History.txt +97 -0
  5. data/Manifest.txt +34 -7
  6. data/README.rdoc +13 -13
  7. data/Rakefile +36 -26
  8. data/ext/nmatrix/data/data.cpp +15 -2
  9. data/ext/nmatrix/data/data.h +4 -0
  10. data/ext/nmatrix/data/ruby_object.h +5 -14
  11. data/ext/nmatrix/extconf.rb +3 -2
  12. data/ext/nmatrix/{util/math.cpp → math.cpp} +296 -6
  13. data/ext/nmatrix/math/asum.h +143 -0
  14. data/ext/nmatrix/math/geev.h +82 -0
  15. data/ext/nmatrix/math/gemm.h +267 -0
  16. data/ext/nmatrix/math/gemv.h +208 -0
  17. data/ext/nmatrix/math/ger.h +96 -0
  18. data/ext/nmatrix/math/gesdd.h +80 -0
  19. data/ext/nmatrix/math/gesvd.h +78 -0
  20. data/ext/nmatrix/math/getf2.h +86 -0
  21. data/ext/nmatrix/math/getrf.h +240 -0
  22. data/ext/nmatrix/math/getri.h +107 -0
  23. data/ext/nmatrix/math/getrs.h +125 -0
  24. data/ext/nmatrix/math/idamax.h +86 -0
  25. data/ext/nmatrix/{util → math}/lapack.h +60 -356
  26. data/ext/nmatrix/math/laswp.h +165 -0
  27. data/ext/nmatrix/math/long_dtype.h +52 -0
  28. data/ext/nmatrix/math/math.h +1154 -0
  29. data/ext/nmatrix/math/nrm2.h +181 -0
  30. data/ext/nmatrix/math/potrs.h +125 -0
  31. data/ext/nmatrix/math/rot.h +141 -0
  32. data/ext/nmatrix/math/rotg.h +115 -0
  33. data/ext/nmatrix/math/scal.h +73 -0
  34. data/ext/nmatrix/math/swap.h +73 -0
  35. data/ext/nmatrix/math/trsm.h +383 -0
  36. data/ext/nmatrix/nmatrix.cpp +176 -152
  37. data/ext/nmatrix/nmatrix.h +1 -2
  38. data/ext/nmatrix/ruby_constants.cpp +9 -4
  39. data/ext/nmatrix/ruby_constants.h +1 -0
  40. data/ext/nmatrix/storage/dense.cpp +57 -41
  41. data/ext/nmatrix/storage/list.cpp +52 -50
  42. data/ext/nmatrix/storage/storage.cpp +59 -43
  43. data/ext/nmatrix/storage/yale.cpp +352 -333
  44. data/ext/nmatrix/storage/yale.h +4 -0
  45. data/lib/nmatrix.rb +2 -2
  46. data/lib/nmatrix/blas.rb +4 -4
  47. data/lib/nmatrix/enumerate.rb +241 -0
  48. data/lib/nmatrix/lapack.rb +54 -1
  49. data/lib/nmatrix/math.rb +462 -0
  50. data/lib/nmatrix/nmatrix.rb +210 -486
  51. data/lib/nmatrix/nvector.rb +0 -62
  52. data/lib/nmatrix/rspec.rb +75 -0
  53. data/lib/nmatrix/shortcuts.rb +136 -108
  54. data/lib/nmatrix/version.rb +1 -1
  55. data/spec/blas_spec.rb +20 -12
  56. data/spec/elementwise_spec.rb +22 -13
  57. data/spec/io_spec.rb +1 -0
  58. data/spec/lapack_spec.rb +197 -0
  59. data/spec/nmatrix_spec.rb +39 -38
  60. data/spec/nvector_spec.rb +3 -9
  61. data/spec/rspec_monkeys.rb +29 -0
  62. data/spec/rspec_spec.rb +34 -0
  63. data/spec/shortcuts_spec.rb +14 -16
  64. data/spec/slice_spec.rb +242 -186
  65. data/spec/spec_helper.rb +19 -0
  66. metadata +33 -5
  67. data/ext/nmatrix/util/math.h +0 -2612
@@ -139,7 +139,8 @@ DENSE_STORAGE* create_from_yale_storage(const YALE_STORAGE* rhs, dtype_t l_dtype
139
139
 
140
140
  // Allocate and set shape.
141
141
  size_t* shape = ALLOC_N(size_t, rhs->dim);
142
- memcpy(shape, rhs->shape, rhs->dim * sizeof(size_t));
142
+ shape[0] = rhs->shape[0];
143
+ shape[1] = rhs->shape[1];
143
144
 
144
145
  DENSE_STORAGE* lhs = nm_dense_storage_create(l_dtype, shape, rhs->dim, NULL, 0);
145
146
  LDType* lhs_elements = reinterpret_cast<LDType*>(lhs->elements);
@@ -147,45 +148,49 @@ DENSE_STORAGE* create_from_yale_storage(const YALE_STORAGE* rhs, dtype_t l_dtype
147
148
  // Position in dense to write to.
148
149
  size_t pos = 0;
149
150
 
150
- LDType LCAST_ZERO = rhs_a[rhs->shape[0]];
151
+ LDType LCAST_ZERO = rhs_a[rhs->src->shape[0]];
151
152
 
152
153
  // Walk through rows. For each entry we set in dense, increment pos.
153
- for (RIType i = 0; i < rhs->shape[0]; ++i) {
154
-
155
- // Position in yale array
156
- RIType ija = rhs_ija[i];
154
+ for (size_t i = 0; i < shape[0]; ++i) {
155
+ RIType ri = i + rhs->offset[0];
157
156
 
158
- if (ija == rhs_ija[i+1]) { // Check boundaries of row: is row empty?
157
+ if (rhs_ija[ri] == rhs_ija[ri+1]) { // Check boundaries of row: is row empty? (Yes.)
159
158
 
160
159
  // Write zeros in each column.
161
- for (RIType j = 0; j < rhs->shape[1]; ++j) { // Move to next dense position.
160
+ for (size_t j = 0; j < shape[1]; ++j) { // Move to next dense position.
162
161
 
163
- // Fill in zeros (except for diagonal)
164
- if (i == j) lhs_elements[pos] = static_cast<LDType>(rhs_a[i]);
165
- else lhs_elements[pos] = LCAST_ZERO;
162
+ // Fill in zeros and copy the diagonal entry for this empty row.
163
+ if (ri == j + rhs->offset[1]) lhs_elements[pos] = static_cast<LDType>(rhs_a[ri]);
164
+ else lhs_elements[pos] = LCAST_ZERO;
166
165
 
167
166
  ++pos;
168
167
  }
169
168
 
170
- } else {
171
- // Row contains entries: write those in each column, interspersed with zeros.
172
- RIType jj = rhs_ija[ija];
169
+ } else { // Row contains entries: write those in each column, interspersed with zeros.
170
+
171
+ // Get the first ija position of the row (as sliced)
172
+ RIType ija = nm::yale_storage::binary_search_left_boundary<RIType>(rhs, rhs_ija[ri], rhs_ija[ri+1]-1, rhs->offset[1]);
173
+
174
+ // What column is it?
175
+ RIType next_stored_rj = rhs_ija[ija];
173
176
 
174
- for (size_t j = 0; j < rhs->shape[1]; ++j) {
175
- if (i == j) {
176
- lhs_elements[pos] = static_cast<LDType>(rhs_a[i]);
177
+ for (size_t j = 0; j < shape[1]; ++j) {
178
+ RIType rj = j + rhs->offset[1];
177
179
 
178
- } else if (j == jj) {
180
+ if (rj == ri) { // at a diagonal in RHS
181
+ lhs_elements[pos] = static_cast<LDType>(rhs_a[ri]);
182
+
183
+ } else if (rj == next_stored_rj) { // column ID was found in RHS
179
184
  lhs_elements[pos] = static_cast<LDType>(rhs_a[ija]); // Copy from rhs.
180
185
 
181
186
  // Get next.
182
187
  ++ija;
183
188
 
184
189
  // Increment to next column ID (or go off the end).
185
- if (ija < rhs_ija[i+1]) jj = rhs_ija[ija];
186
- else jj = rhs->shape[1];
190
+ if (ija < rhs_ija[ri+1]) next_stored_rj = rhs_ija[ija];
191
+ else next_stored_rj = rhs->src->shape[1];
187
192
 
188
- } else { // j < jj
193
+ } else { // rj < next_stored_rj
189
194
 
190
195
  // Insert zero.
191
196
  lhs_elements[pos] = LCAST_ZERO;
@@ -319,7 +324,7 @@ LIST_STORAGE* create_from_yale_storage(const YALE_STORAGE* rhs, dtype_t l_dtype)
319
324
  shape[0] = rhs->shape[0]; shape[1] = rhs->shape[1];
320
325
 
321
326
  RDType* rhs_a = reinterpret_cast<RDType*>(rhs->a);
322
- RDType R_ZERO = rhs_a[ rhs->shape[0] ];
327
+ RDType R_ZERO = rhs_a[ rhs->src->shape[0] ];
323
328
 
324
329
  // copy default value from the zero location in the Yale matrix
325
330
  LDType* default_val = ALLOC_N(LDType, 1);
@@ -333,35 +338,43 @@ LIST_STORAGE* create_from_yale_storage(const YALE_STORAGE* rhs, dtype_t l_dtype)
333
338
 
334
339
  NODE *last_row_added = NULL;
335
340
  // Walk through rows and columns as if RHS were a dense matrix
336
- for (RIType i = 0; i < rhs->shape[0]; ++i) {
341
+ for (RIType i = 0; i < shape[0]; ++i) {
342
+ RIType ri = i + rhs->offset[0];
343
+
337
344
  NODE *last_added = NULL;
338
345
 
339
346
  // Get boundaries of beginning and end of row
340
- RIType ija = rhs_ija[i],
341
- ija_next = rhs_ija[i+1];
347
+ RIType ija = rhs_ija[ri],
348
+ ija_next = rhs_ija[ri+1];
342
349
 
343
350
  // Are we going to need to add a diagonal for this row?
344
351
  bool add_diag = false;
345
- if (rhs_a[i] != R_ZERO) add_diag = true;
352
+ if (rhs_a[ri] != R_ZERO) add_diag = true; // non-zero and located within the bounds of the slice
346
353
 
347
354
  if (ija < ija_next || add_diag) {
355
+ ija = nm::yale_storage::binary_search_left_boundary<RIType>(rhs, ija, ija_next-1, rhs->offset[1]);
348
356
 
349
357
  LIST* curr_row = list::create();
350
358
 
351
359
  LDType* insert_val;
352
360
 
353
361
  while (ija < ija_next) {
354
- RIType jj = rhs_ija[ija]; // what column number is this?
362
+ // Find first column in slice
363
+ RIType rj = rhs_ija[ija];
364
+ RIType j = rj - rhs->offset[1];
355
365
 
356
366
  // Is there a nonzero diagonal item between the previously added item and the current one?
357
- if (jj > i && add_diag) {
367
+ if (rj > ri && add_diag) {
358
368
  // Allocate and copy insertion value
359
369
  insert_val = ALLOC_N(LDType, 1);
360
- *insert_val = static_cast<LDType>(rhs_a[i]);
370
+ *insert_val = static_cast<LDType>(rhs_a[ri]);
361
371
 
362
- // insert the item in the list at the appropriate location
363
- if (last_added) last_added = list::insert_after(last_added, i, insert_val);
364
- else last_added = list::insert(curr_row, false, i, insert_val);
372
+ // Insert the item in the list at the appropriate location.
373
+ // What is the appropriate key? Well, it's definitely right(i)==right(j), but the
374
+ // rj index has already been advanced past ri. So we should treat ri as the column and
375
+ // subtract offset[1].
376
+ if (last_added) last_added = list::insert_after(last_added, ri - rhs->offset[1], insert_val);
377
+ else last_added = list::insert(curr_row, false, ri - rhs->offset[1], insert_val);
365
378
 
366
379
  // don't add again!
367
380
  add_diag = false;
@@ -371,20 +384,23 @@ LIST_STORAGE* create_from_yale_storage(const YALE_STORAGE* rhs, dtype_t l_dtype)
371
384
  insert_val = ALLOC_N(LDType, 1);
372
385
  *insert_val = static_cast<LDType>(rhs_a[ija]);
373
386
 
374
- if (last_added) last_added = list::insert_after(last_added, jj, insert_val);
375
- else last_added = list::insert(curr_row, false, jj, insert_val);
387
+ if (last_added) last_added = list::insert_after(last_added, j, insert_val);
388
+ else last_added = list::insert(curr_row, false, j, insert_val);
376
389
 
377
390
  ++ija; // move to next entry in Yale matrix
378
391
  }
379
392
 
380
393
  if (add_diag) {
394
+
381
395
  // still haven't added the diagonal.
382
396
  insert_val = ALLOC_N(LDType, 1);
383
- *insert_val = static_cast<LDType>(rhs_a[i]);
397
+ *insert_val = static_cast<LDType>(rhs_a[ri]);
384
398
 
385
399
  // insert the item in the list at the appropriate location
386
- if (last_added) last_added = list::insert_after(last_added, i, insert_val);
387
- else last_added = list::insert(curr_row, false, i, insert_val);
400
+ if (last_added) last_added = list::insert_after(last_added, ri - rhs->offset[1], insert_val);
401
+ else last_added = list::insert(curr_row, false, ri - rhs->offset[1], insert_val);
402
+
403
+ // no need to set add_diag to false because it'll be reset automatically in next iteration.
388
404
  }
389
405
 
390
406
  // Now add the list at the appropriate location
@@ -458,6 +474,7 @@ namespace yale_storage { // FIXME: Move to yale.cpp
458
474
  */
459
475
  template <typename LDType, typename RDType, typename LIType>
460
476
  YALE_STORAGE* create_from_dense_storage(const DENSE_STORAGE* rhs, dtype_t l_dtype, void* init) {
477
+
461
478
  if (rhs->dim != 2) rb_raise(nm_eStorageTypeError, "can only convert matrices of dim 2 to yale");
462
479
 
463
480
  LIType pos = 0;
@@ -504,16 +521,15 @@ namespace yale_storage { // FIXME: Move to yale.cpp
504
521
 
505
522
  // Start just after the zero position.
506
523
  LIType ija = shape[0]+1;
507
- LIType i;
508
524
  pos = 0;
509
525
 
510
526
  // Copy contents
511
- for (i = 0; i < rhs->shape[0]; ++i) {
527
+ for (LIType i = 0; i < rhs->shape[0]; ++i) {
512
528
  // indicate the beginning of a row in the IJA array
513
- lhs_ija[i]= ija;
529
+ lhs_ija[i] = ija;
514
530
 
515
531
  for (LIType j = 0; j < rhs->shape[1]; ++j) {
516
- pos = rhs->stride[0]*(i + rhs->offset[0]) + rhs->stride[1]*(j + rhs->offset[1]); // calc position with offsets
532
+ pos = rhs->stride[0] * (i + rhs->offset[0]) + rhs->stride[1] * (j + rhs->offset[1]); // calc position with offsets
517
533
 
518
534
  if (i == j) { // copy to diagonal
519
535
  lhs_a[i] = static_cast<LDType>(rhs_elements[pos]);
@@ -570,13 +586,13 @@ namespace yale_storage { // FIXME: Move to yale.cpp
570
586
  // Copy contents
571
587
  for (NODE* i_curr = rhs->rows->first; i_curr; i_curr = i_curr->next) {
572
588
 
573
- // Shrink refernce
589
+ // Shrink reference
574
590
  int i = i_curr->key - rhs->offset[0];
575
591
  if (i < 0 || i >= (int)rhs->shape[0]) continue;
576
592
 
577
593
  for (NODE* j_curr = ((LIST*)(i_curr->val))->first; j_curr; j_curr = j_curr->next) {
578
594
 
579
- // Shrink refernce
595
+ // Shrink reference
580
596
  int j = j_curr->key - rhs->offset[1];
581
597
  if (j < 0 || j >= (int)rhs->shape[1]) continue;
582
598
 
@@ -44,6 +44,7 @@
44
44
  #include <cstdio> // std::fprintf
45
45
  #include <iostream>
46
46
  #include <array>
47
+ #include <typeinfo>
47
48
 
48
49
  #define RB_P(OBJ) \
49
50
  rb_funcall(rb_stderr, rb_intern("print"), 1, rb_funcall(OBJ, rb_intern("object_id"), 0)); \
@@ -54,9 +55,8 @@
54
55
  */
55
56
 
56
57
  // #include "types.h"
57
- #include "util/math.h"
58
-
59
58
  #include "data/data.h"
59
+ #include "math/math.h"
60
60
 
61
61
  #include "common.h"
62
62
  #include "yale.h"
@@ -86,8 +86,11 @@ extern "C" {
86
86
  static YALE_STORAGE* nm_copy_alloc_struct(const YALE_STORAGE* rhs, const nm::dtype_t new_dtype, const size_t new_capacity, const size_t new_size);
87
87
  static YALE_STORAGE* alloc(nm::dtype_t dtype, size_t* shape, size_t dim, nm::itype_t min_itype);
88
88
 
89
+ static size_t yale_count_slice_copy_ndnz(const YALE_STORAGE* s, size_t*, size_t*);
90
+
89
91
  static void* default_value_ptr(const YALE_STORAGE* s);
90
92
  static VALUE default_value(const YALE_STORAGE* s);
93
+ static VALUE obj_at(YALE_STORAGE* s, size_t k);
91
94
 
92
95
  /* Ruby-accessible functions */
93
96
  static VALUE nm_size(VALUE self);
@@ -100,6 +103,9 @@ extern "C" {
100
103
 
101
104
  static VALUE nm_nd_row(int argc, VALUE* argv, VALUE self);
102
105
 
106
+ static inline size_t src_ndnz(const YALE_STORAGE* s) {
107
+ return reinterpret_cast<YALE_STORAGE*>(s->src)->ndnz;
108
+ }
103
109
 
104
110
  } // end extern "C" block
105
111
 
@@ -241,72 +247,6 @@ YALE_STORAGE* create_from_old_yale(dtype_t dtype, size_t* shape, void* r_ia, voi
241
247
  }
242
248
 
243
249
 
244
- /*
245
- * Take two Yale storages and merge them into a new Yale storage.
246
- *
247
- * Uses the left as a template for the creation of a new one.
248
- */
249
- template <typename DType, typename IType>
250
- YALE_STORAGE* create_merged__(const YALE_STORAGE* left, const YALE_STORAGE* right) {
251
- char ins_type;
252
-
253
- size_t size = get_size<IType>(left);
254
-
255
- // s represents the resulting storage
256
- YALE_STORAGE* s = copy_alloc_struct<IType>(left, left->dtype, NM_MAX(left->capacity, right->capacity), size);
257
-
258
- IType* sija = reinterpret_cast<IType*>(s->ija);
259
- IType* rija = reinterpret_cast<IType*>(right->ija);
260
-
261
- // set the element between D and LU (the boundary in A), which should be 0.
262
- reinterpret_cast<DType*>(s->a)[s->shape[0]] = reinterpret_cast<DType*>(left->a)[left->shape[0]];
263
-
264
- if (right && right != left) {
265
- // some operations are unary and don't need this; others are x+x and don't need this
266
-
267
- for (IType i = 0; i < s->shape[0]; ++i) {
268
-
269
- IType ija = sija[i];
270
- IType ija_next = sija[i+1];
271
-
272
- for (IType r_ija = rija[i]; r_ija < rija[i+1]; ++r_ija) {
273
-
274
- size_t ja = sija[ija]; // insert expects a size_t
275
-
276
- if (ija == ija_next) {
277
- // destination row is empty
278
- ins_type = vector_insert<DType,IType>(s, ija, &ja, NULL, 1, true);
279
- increment_ia_after<IType>(s, s->shape[0], i, 1);
280
- ++(s->ndnz);
281
- ++ija;
282
-
283
- if (ins_type == 'i') ++ija_next;
284
-
285
- } else {
286
- bool found;
287
-
288
- // merge positions into destination row
289
- IType pos = insert_search<IType>(s, ija, ija_next-1, sija[ija], &found);
290
-
291
- if (!found) {
292
- vector_insert<DType,IType>(s, pos, &ja, NULL, 1, true);
293
- increment_ia_after<IType>(s, s->shape[0], i, 1);
294
- ++(s->ndnz);
295
-
296
- if (ins_type == 'i') ++ija_next;
297
- }
298
-
299
- // can now set a left boundary for the next search
300
- ija = pos + 1;
301
- }
302
- }
303
- }
304
- }
305
-
306
- return s;
307
- }
308
-
309
-
310
250
  /*
311
251
  * Empty the matrix by initializing the IJA vector and setting the diagonal to 0.
312
252
  *
@@ -332,29 +272,27 @@ size_t max_size(YALE_STORAGE* s) {
332
272
 
333
273
  return result;
334
274
  }
275
+
276
+
335
277
  ///////////////
336
278
  // Accessors //
337
279
  ///////////////
338
280
 
281
+
339
282
  /*
340
- * Returns a slice of YALE_STORAGE object by copy
283
+ * Determine the number of non-diagonal non-zeros in a not-yet-created copy of a slice or matrix.
341
284
  */
342
- template <typename DType,typename IType>
343
- void* get(YALE_STORAGE* storage, SLICE* slice) {
344
-
345
- size_t *offset = slice->coords;
346
- // Copy shape for yale construction
347
- size_t* shape = ALLOC_N(size_t, 2);
348
- shape[0] = slice->lengths[0];
349
- shape[1] = slice->lengths[1];
285
+ template <typename DType, typename IType>
286
+ static size_t count_slice_copy_ndnz(const YALE_STORAGE* s, size_t* offset, size_t* shape) {
287
+ IType* ija = reinterpret_cast<IType*>(s->ija);
288
+ DType* a = reinterpret_cast<DType*>(s->a);
350
289
 
351
- IType* src_ija = reinterpret_cast<IType*>(storage->ija);
352
- DType* src_a = reinterpret_cast<DType*>(storage->a);
290
+ DType ZERO(*reinterpret_cast<DType*>(default_value_ptr(s)));
353
291
 
354
292
  // Calc ndnz for the destination
355
293
  size_t ndnz = 0;
356
- size_t i,j; // indexes of destination matrix
357
- size_t k,l; // indexes of source matrix
294
+ size_t i, j; // indexes of destination matrix
295
+ size_t k, l; // indexes of source matrix
358
296
  for (i = 0; i < shape[0]; i++) {
359
297
  k = i + offset[0];
360
298
  for (j = 0; j < shape[1]; j++) {
@@ -363,42 +301,54 @@ void* get(YALE_STORAGE* storage, SLICE* slice) {
363
301
  if (j == i) continue;
364
302
 
365
303
  if (k == l) { // for diagonal element of source
366
- if (src_a[k] != 0) ++ndnz;
304
+ if (a[k] != ZERO) ++ndnz;
367
305
  } else { // for non-diagonal element
368
- for (size_t c = src_ija[k]; c < src_ija[k+1]; c++) {
369
- if (src_ija[c] == l) {
306
+ for (size_t c = ija[k]; c < ija[k+1]; c++) {
307
+ if (ija[c] == l) {
370
308
  ++ndnz;
371
309
  break;
372
310
  }
373
311
  }
374
312
  }
375
-
376
313
  }
377
314
  }
378
315
 
379
- size_t request_capacity = shape[0] + ndnz + 1;
380
- //fprintf(stderr, "yale get copy: shape0=%d, shape1=%d, ndnz=%d, request_capacity=%d\n", shape[0], shape[1], ndnz, request_capacity);
381
- YALE_STORAGE* ns = nm_yale_storage_create(storage->dtype, shape, 2, request_capacity, storage->itype);
316
+ return ndnz;
317
+ }
318
+
319
+
320
+
321
+ /*
322
+ * Copy some portion of a matrix into a new matrix.
323
+ */
324
+ template <typename LDType, typename RDType, typename IType>
325
+ static void slice_copy(YALE_STORAGE* ns, const YALE_STORAGE* s, size_t* offset, size_t* lengths, dtype_t new_dtype) {
326
+
327
+ IType* src_ija = reinterpret_cast<IType*>(s->ija);
328
+ RDType* src_a = reinterpret_cast<RDType*>(s->a);
382
329
 
383
- if (ns->capacity < request_capacity)
384
- rb_raise(nm_eStorageTypeError, "conversion failed; capacity of %ld requested, max allowable is %ld", request_capacity, ns->capacity);
330
+ RDType RZERO(*reinterpret_cast<RDType*>(default_value_ptr(s)));
385
331
 
386
332
  // Initialize the A and IJA arrays
387
- init<DType,IType>(ns, default_value_ptr(storage));
388
- IType* dst_ija = reinterpret_cast<IType*>(ns->ija);
389
- DType* dst_a = reinterpret_cast<DType*>(ns->a);
390
-
391
- size_t ija = shape[0] + 1;
392
- DType val = src_a[storage->shape[0]]; // use 0 as the default for copy
393
- for (i = 0; i < shape[0]; ++i) {
333
+ LDType val(RZERO); // need default value for init. Can't use ns default value because it's not initialized yet
334
+ init<LDType,IType>(ns, &val);
335
+ IType* dst_ija = reinterpret_cast<IType*>(ns->ija);
336
+ LDType* dst_a = reinterpret_cast<LDType*>(ns->a);
337
+
338
+ size_t ija = lengths[0] + 1;
339
+
340
+ size_t i, j; // indexes of destination matrix
341
+ size_t k, l; // indexes of source matrix
342
+
343
+ for (i = 0; i < lengths[0]; ++i) {
394
344
  k = i + offset[0];
395
- for (j = 0; j < shape[1]; ++j) {
345
+ for (j = 0; j < lengths[1]; ++j) {
396
346
  bool found = false;
397
347
  l = j + offset[1];
398
-
348
+
399
349
  // Get value from source matrix
400
350
  if (k == l) { // source diagonal
401
- if (src_a[k] != 0) { // don't bother copying non-zero values from the diagonal
351
+ if (src_a[k] != RZERO) { // don't bother copying non-zero values from the diagonal
402
352
  val = src_a[k];
403
353
  found = true;
404
354
  }
@@ -420,9 +370,8 @@ void* get(YALE_STORAGE* storage, SLICE* slice) {
420
370
  // copy non-diagonal element
421
371
  dst_ija[ija] = j;
422
372
  dst_a[ija] = val;
423
-
424
373
  ++ija;
425
- for (size_t c = i + 1; c <= shape[0]; ++c) {
374
+ for (size_t c = i + 1; c <= lengths[0]; ++c) {
426
375
  dst_ija[c] = ija;
427
376
  }
428
377
  }
@@ -430,39 +379,71 @@ void* get(YALE_STORAGE* storage, SLICE* slice) {
430
379
  }
431
380
  }
432
381
 
433
- dst_ija[shape[0]] = ija; // indicate the end of the last row
434
- ns->ndnz = ndnz;
435
- return ns;
382
+ dst_ija[lengths[0]] = ija; // indicate the end of the last row
383
+ ns->ndnz = ija - lengths[0] - 1; // update ndnz count
384
+ }
385
+
386
+
387
+ /*
388
+ * Get a single element of a yale storage object
389
+ */
390
+ template <typename DType, typename IType>
391
+ static void* get_single(YALE_STORAGE* storage, SLICE* slice) {
392
+
393
+ DType* a = reinterpret_cast<DType*>(storage->a);
394
+ IType* ija = reinterpret_cast<IType*>(storage->ija);
395
+
396
+ size_t coord0 = storage->offset[0] + slice->coords[0];
397
+ size_t coord1 = storage->offset[1] + slice->coords[1];
398
+
399
+ if (coord0 == coord1)
400
+ return &(a[ coord0 ]); // return diagonal entry
401
+
402
+ if (ija[coord0] == ija[coord0+1])
403
+ return &(a[ storage->src->shape[0] ]); // return zero pointer
404
+
405
+ // binary search for the column's location
406
+ int pos = binary_search<IType>(storage, ija[coord0], ija[coord0+1]-1, coord1);
407
+
408
+ if (pos != -1 && ija[pos] == coord1)
409
+ return &(a[pos]); // found exact value
410
+
411
+ return &(a[ storage->src->shape[0] ]); // return a pointer that happens to be zero
436
412
  }
413
+
414
+
437
415
  /*
438
416
  * Returns a pointer to the correct location in the A vector of a YALE_STORAGE object, given some set of coordinates
439
417
  * (the coordinates are stored in slice).
440
418
  */
441
419
  template <typename DType,typename IType>
442
- void* ref(YALE_STORAGE* storage, SLICE* slice) {
443
- size_t* coords = slice->coords;
420
+ void* ref(YALE_STORAGE* s, SLICE* slice) {
444
421
 
445
- if (!slice->single) rb_raise(rb_eNotImpError, "This type slicing not supported yet.");
422
+ YALE_STORAGE* ns = ALLOC( YALE_STORAGE );
446
423
 
447
- DType* a = reinterpret_cast<DType*>(storage->a);
448
- IType* ija = reinterpret_cast<IType*>(storage->ija);
424
+ ns->dim = s->dim;
425
+ ns->offset = ALLOC_N(size_t, ns->dim);
426
+ ns->shape = ALLOC_N(size_t, ns->dim);
449
427
 
450
- if (coords[0] == coords[1])
451
- return &(a[ coords[0] ]); // return diagonal entry
428
+ for (size_t i = 0; i < ns->dim; ++i) {
429
+ ns->offset[i] = slice->coords[i] + s->offset[i];
430
+ ns->shape[i] = slice->lengths[i];
431
+ }
452
432
 
453
- if (ija[coords[0]] == ija[coords[0]+1])
454
- return &(a[ storage->shape[0] ]); // return zero pointer
433
+ ns->dtype = s->dtype;
434
+ ns->itype = s->itype; // or should we go by shape?
455
435
 
456
- // binary search for the column's location
457
- int pos = binary_search<IType>(storage,
458
- ija[coords[0]],
459
- ija[coords[0]+1]-1,
460
- coords[1]);
436
+ ns->a = s->a;
437
+ ns->ija = s->ija;
461
438
 
462
- if (pos != -1 && ija[pos] == coords[1])
463
- return &(a[pos]); // found exact value
439
+ ns->src = s->src;
440
+ s->src->count++;
441
+
442
+ ns->ndnz = 0;
443
+ ns->capacity= 0;
444
+
445
+ return ns;
464
446
 
465
- return &(a[ storage->shape[0] ]); // return a pointer that happens to be zero
466
447
  }
467
448
 
468
449
  /*
@@ -472,22 +453,23 @@ void* ref(YALE_STORAGE* storage, SLICE* slice) {
472
453
  template <typename DType, typename IType>
473
454
  char set(YALE_STORAGE* storage, SLICE* slice, void* value) {
474
455
  DType* v = reinterpret_cast<DType*>(value);
475
- size_t* coords = slice->coords;
456
+ size_t coord0 = storage->offset[0] + slice->coords[0],
457
+ coord1 = storage->offset[1] + slice->coords[1];
476
458
 
477
459
  bool found = false;
478
460
  char ins_type;
479
461
 
480
- if (coords[0] == coords[1]) {
481
- reinterpret_cast<DType*>(storage->a)[coords[0]] = *v; // set diagonal
462
+ if (coord0 == coord1) {
463
+ reinterpret_cast<DType*>(storage->a)[coord0] = *v; // set diagonal
482
464
  return 'r';
483
465
  }
484
466
 
485
467
  // Get IJA positions of the beginning and end of the row
486
- if (reinterpret_cast<IType*>(storage->ija)[coords[0]] == reinterpret_cast<IType*>(storage->ija)[coords[0]+1]) {
468
+ if (reinterpret_cast<IType*>(storage->ija)[coord0] == reinterpret_cast<IType*>(storage->ija)[coord0+1]) {
487
469
  // empty row
488
- ins_type = vector_insert<DType,IType>(storage, reinterpret_cast<IType*>(storage->ija)[coords[0]], &(coords[1]), v, 1, false);
489
- increment_ia_after<IType>(storage, storage->shape[0], coords[0], 1);
490
- storage->ndnz++;
470
+ ins_type = vector_insert<DType,IType>(storage, reinterpret_cast<IType*>(storage->ija)[coord0], &(coord1), v, 1, false);
471
+ increment_ia_after<IType>(storage, storage->shape[0], coord0, 1);
472
+ reinterpret_cast<YALE_STORAGE*>(storage->src)->ndnz++;
491
473
 
492
474
  return ins_type;
493
475
  }
@@ -498,19 +480,19 @@ char set(YALE_STORAGE* storage, SLICE* slice, void* value) {
498
480
 
499
481
  // Do a binary search for the column
500
482
  size_t pos = insert_search<IType>(storage,
501
- reinterpret_cast<IType*>(storage->ija)[coords[0]],
502
- reinterpret_cast<IType*>(storage->ija)[coords[0]+1]-1,
503
- coords[1], &found);
483
+ reinterpret_cast<IType*>(storage->ija)[coord0],
484
+ reinterpret_cast<IType*>(storage->ija)[coord0+1]-1,
485
+ coord1, &found);
504
486
 
505
487
  if (found) { // replace
506
- reinterpret_cast<IType*>(storage->ija)[pos] = coords[1];
488
+ reinterpret_cast<IType*>(storage->ija)[pos] = coord1;
507
489
  reinterpret_cast<DType*>(storage->a)[pos] = *v;
508
490
  return 'r';
509
491
  }
510
492
 
511
- ins_type = vector_insert<DType,IType>(storage, pos, &(coords[1]), v, 1, false);
512
- increment_ia_after<IType>(storage, storage->shape[0], coords[0], 1);
513
- storage->ndnz++;
493
+ ins_type = vector_insert<DType,IType>(storage, pos, &(coord1), v, 1, false);
494
+ increment_ia_after<IType>(storage, storage->shape[0], coord0, 1);
495
+ reinterpret_cast<YALE_STORAGE*>(storage->src)->ndnz++;
514
496
 
515
497
  return ins_type;
516
498
  }
@@ -682,6 +664,31 @@ static bool ndrow_is_empty(const YALE_STORAGE* s, IType ija, const IType ija_nex
682
664
  // Utility //
683
665
  /////////////
684
666
 
667
+
668
+ /*
669
+ * Binary search for finding the beginning of a slice. Returns the position of the first element which is larger than
670
+ * bound.
671
+ */
672
+ template <typename IType>
673
+ IType binary_search_left_boundary(const YALE_STORAGE* s, IType left, IType right, IType bound) {
674
+ if (left > right) return -1;
675
+
676
+ IType* ija = reinterpret_cast<IType*>(s->ija);
677
+
678
+ if (ija[left] >= bound) return left; // shortcut
679
+
680
+ IType mid = (left + right) / 2;
681
+ IType mid_j = ija[mid];
682
+
683
+ if (mid_j == bound)
684
+ return mid;
685
+ else if (mid_j > bound) { // eligible! don't exclude it.
686
+ return binary_search_left_boundary<IType>(s, left, mid, bound);
687
+ } else // (mid_j < bound)
688
+ return binary_search_left_boundary<IType>(s, mid + 1, right, bound);
689
+ }
690
+
691
+
685
692
  /*
686
693
  * Binary search for returning stored values. Returns a non-negative position, or -1 for not found.
687
694
  */
@@ -710,6 +717,8 @@ int binary_search(YALE_STORAGE* s, IType left, IType right, IType key) {
710
717
  * Resize yale storage vectors A and IJA, copying values.
711
718
  */
712
719
  static void vector_grow(YALE_STORAGE* s) {
720
+ if (s->src != s) throw; // need to correct this quickly.
721
+
713
722
  size_t new_capacity = s->capacity * GROWTH_CONSTANT;
714
723
  size_t max_capacity = max_size(s);
715
724
 
@@ -742,6 +751,8 @@ static void vector_grow(YALE_STORAGE* s) {
742
751
  */
743
752
  template <typename DType, typename IType>
744
753
  static char vector_insert_resize(YALE_STORAGE* s, size_t current_size, size_t pos, size_t* j, size_t n, bool struct_only) {
754
+ if (s != s->src) throw;
755
+
745
756
  // Determine the new capacity for the IJA and A vectors.
746
757
  size_t new_capacity = s->capacity * GROWTH_CONSTANT;
747
758
  size_t max_capacity = max_size(s);
@@ -916,15 +927,30 @@ static IType insert_search(YALE_STORAGE* s, IType left, IType right, IType key,
916
927
  template <typename LDType, typename RDType, typename IType>
917
928
  YALE_STORAGE* cast_copy(const YALE_STORAGE* rhs, dtype_t new_dtype) {
918
929
 
919
- // Allocate a new structure
920
- size_t size = get_size<IType>(rhs);
921
- YALE_STORAGE* lhs = copy_alloc_struct<IType>(rhs, new_dtype, rhs->capacity, size);
930
+ YALE_STORAGE* lhs;
922
931
 
923
- if (rhs->dtype == new_dtype) { // FIXME: Test if this condition is actually faster; second condition should work just as well.
932
+ if (rhs->src != rhs) { // copy the reference
933
+ // Copy shape for yale construction
934
+ size_t* shape = ALLOC_N(size_t, 2);
935
+ shape[0] = rhs->shape[0];
936
+ shape[1] = rhs->shape[1];
937
+ size_t ndnz = src_ndnz(rhs);
938
+ if (shape[0] != rhs->src->shape[0] || shape[1] != rhs->src->shape[1])
939
+ ndnz = count_slice_copy_ndnz<RDType,IType>(rhs, rhs->offset, rhs->shape); // expensive, avoid if possible
940
+ size_t request_capacity = shape[0] + ndnz + 1;
941
+ // FIXME: Should we use a different itype? Or same?
942
+ lhs = nm_yale_storage_create(new_dtype, shape, 2, request_capacity, rhs->itype);
924
943
 
925
- memcpy(lhs->a, rhs->a, size * DTYPE_SIZES[new_dtype]);
944
+ // This check probably isn't necessary.
945
+ if (lhs->capacity < request_capacity)
946
+ rb_raise(nm_eStorageTypeError, "conversion failed; capacity of %ld requested, max allowable is %ld", request_capacity, lhs->capacity);
926
947
 
927
- } else {
948
+ slice_copy<LDType, RDType, IType>(lhs, rhs, rhs->offset, rhs->shape, new_dtype);
949
+ } else { // regular copy
950
+
951
+ // Allocate a new structure
952
+ size_t size = get_size<IType>(rhs);
953
+ lhs = copy_alloc_struct<IType>(rhs, new_dtype, rhs->capacity, size);
928
954
 
929
955
  LDType* la = reinterpret_cast<LDType*>(lhs->a);
930
956
  RDType* ra = reinterpret_cast<RDType*>(rhs->a);
@@ -932,7 +958,6 @@ YALE_STORAGE* cast_copy(const YALE_STORAGE* rhs, dtype_t new_dtype) {
932
958
  for (size_t index = 0; index < size; ++index) {
933
959
  la[index] = ra[index];
934
960
  }
935
-
936
961
  }
937
962
 
938
963
  return lhs;
@@ -958,7 +983,10 @@ static YALE_STORAGE* copy_alloc_struct(const YALE_STORAGE* rhs, const dtype_t ne
958
983
  lhs->shape = ALLOC_N( size_t, lhs->dim );
959
984
  lhs->offset = ALLOC_N( size_t, lhs->dim );
960
985
  memcpy(lhs->shape, rhs->shape, lhs->dim * sizeof(size_t));
961
- memcpy(lhs->shape, rhs->shape, lhs->dim * sizeof(size_t));
986
+ //memcpy(lhs->offset, rhs->offset, lhs->dim * sizeof(size_t));
987
+ lhs->offset[0] = 0;
988
+ lhs->offset[1] = 0;
989
+
962
990
  lhs->itype = rhs->itype;
963
991
  lhs->capacity = new_capacity;
964
992
  lhs->dtype = new_dtype;
@@ -967,12 +995,16 @@ static YALE_STORAGE* copy_alloc_struct(const YALE_STORAGE* rhs, const dtype_t ne
967
995
  lhs->ija = ALLOC_N( IType, lhs->capacity );
968
996
  lhs->a = ALLOC_N( char, DTYPE_SIZES[new_dtype] * lhs->capacity );
969
997
  lhs->src = lhs;
998
+ lhs->count = 1;
970
999
 
971
1000
  // Now copy the contents -- but only within the boundaries set by the size. Leave
972
1001
  // the rest uninitialized.
973
- for (size_t i = 0; i < get_size<IType>(rhs); ++i)
974
- reinterpret_cast<IType*>(lhs->ija)[i] = reinterpret_cast<IType*>(rhs->ija)[i]; // copy indices
975
-
1002
+ if (!rhs->offset[0] && !rhs->offset[1]) {
1003
+ for (size_t i = 0; i < get_size<IType>(rhs); ++i)
1004
+ reinterpret_cast<IType*>(lhs->ija)[i] = reinterpret_cast<IType*>(rhs->ija)[i]; // copy indices
1005
+ } else {
1006
+ rb_raise(rb_eNotImpError, "cannot copy struct due to different offsets");
1007
+ }
976
1008
  return lhs;
977
1009
  }
978
1010
 
@@ -1045,13 +1077,6 @@ static std::array<size_t,2> get_offsets(YALE_STORAGE* x) {
1045
1077
  }
1046
1078
 
1047
1079
 
1048
- static VALUE obj_at(YALE_STORAGE* s, size_t k) {
1049
- if (s->dtype == nm::RUBYOBJ) return reinterpret_cast<VALUE*>(s->a)[k];
1050
- else return rubyobj_from_cval(reinterpret_cast<void*>(reinterpret_cast<char*>(s->a) + k * DTYPE_SIZES[s->dtype]), s->dtype).rval;
1051
- }
1052
-
1053
-
1054
-
1055
1080
  template <typename IType>
1056
1081
  class IJAManager {
1057
1082
  protected:
@@ -1127,10 +1152,6 @@ public:
1127
1152
  }
1128
1153
 
1129
1154
  inline IType proper_j() const {
1130
- //if (!diag && k >= s->capacity) {
1131
- // std::cerr << "proper_j(): Warning: (nondiag) k exceeded capacity at row " << int(i) << ": k=" << int(k) << ", cap=" << s->capacity << std::endl;
1132
- // throw;
1133
- //}
1134
1155
  return diag ? i : ija[k];
1135
1156
  }
1136
1157
 
@@ -1193,7 +1214,6 @@ public:
1193
1214
  } else if (!row_has_diag()) { // row has no diagonal entries
1194
1215
  if (row_has_no_nd() || k_is_last_nd()) End = true; // row is totally empty, or we're at last entry
1195
1216
  else k++; // still entries to visit
1196
- // } else if (row_has_no_nd()) { // in this case we started at diag, so don't check it
1197
1217
  } else { // not at diag but it exists somewhere in the row, and row has at least one nd entry
1198
1218
  if (diag_is_ahead()) { // diag is ahead
1199
1219
  if (k_is_last_nd()) diag = true; // diag is next and last
@@ -1207,9 +1227,6 @@ public:
1207
1227
  }
1208
1228
  }
1209
1229
 
1210
- //if (k >= s->capacity)
1211
- // std::cerr << "operator++: Warning: k has exceeded capacity for row " << int(i) << "; k=" << int(k) << ", cap=" << s->capacity << std::endl;
1212
-
1213
1230
  return *this;
1214
1231
  }
1215
1232
 
@@ -1222,6 +1239,7 @@ public:
1222
1239
  };
1223
1240
 
1224
1241
 
1242
+
1225
1243
  template <typename IType>
1226
1244
  static VALUE map_stored(VALUE self) {
1227
1245
 
@@ -1236,7 +1254,15 @@ static VALUE map_stored(VALUE self) {
1236
1254
  RETURN_SIZED_ENUMERATOR(self, 0, 0, nm_yale_enumerator_length);
1237
1255
  VALUE init = rb_yield(default_value(s));
1238
1256
 
1239
- YALE_STORAGE* r = nm_yale_storage_create(nm::RUBYOBJ, shape, 2, s->capacity, NM_ITYPE(self));
1257
+ // Try to find a reasonable capacity to request when creating the matrix
1258
+ size_t ndnz = src_ndnz(s);
1259
+ if (s->src != s) // need to guess capacity
1260
+ ndnz = yale_count_slice_copy_ndnz(s, s->offset, s->shape);
1261
+ size_t request_capacity = s->shape[0] + ndnz + 1;
1262
+
1263
+ YALE_STORAGE* r = nm_yale_storage_create(nm::RUBYOBJ, shape, 2, request_capacity, NM_ITYPE(self));
1264
+ if (r->capacity < request_capacity)
1265
+ rb_raise(nm_eStorageTypeError, "conversion failed; capacity of %ld requested, max allowable is %ld", request_capacity, r->capacity);
1240
1266
  nm_yale_storage_init(r, &init);
1241
1267
 
1242
1268
  for (IType ri = 0; ri < shape[0]; ++ri) {
@@ -1314,7 +1340,16 @@ static VALUE map_merged_stored(VALUE left, VALUE right, VALUE init, nm::itype_t
1314
1340
  if (init == Qnil)
1315
1341
  init = rb_yield_values(2, s_init, t_init);
1316
1342
 
1317
- YALE_STORAGE* r = nm_yale_storage_create(nm::RUBYOBJ, shape, 2, NM_MAX(s->capacity, t->capacity), itype);
1343
+ // Make a reasonable approximation of the resulting capacity
1344
+ size_t s_ndnz = src_ndnz(s), t_ndnz = src_ndnz(t);
1345
+ if (s->src != s) s_ndnz = yale_count_slice_copy_ndnz(s, s->offset, s->shape);
1346
+ if (t->src != t) t_ndnz = yale_count_slice_copy_ndnz(t, t->offset, t->shape);
1347
+ size_t request_capacity = shape[0] + NM_MAX(s_ndnz, t_ndnz) + 1;
1348
+
1349
+ YALE_STORAGE* r = nm_yale_storage_create(nm::RUBYOBJ, shape, 2, request_capacity, itype);
1350
+ if (r->capacity < request_capacity)
1351
+ rb_raise(nm_eStorageTypeError, "conversion failed; capacity of %ld requested, max allowable is %ld", request_capacity, r->capacity);
1352
+
1318
1353
  nm_yale_storage_init(r, &init);
1319
1354
 
1320
1355
  IJAManager<IType> sm(s, itype),
@@ -1325,7 +1360,7 @@ static VALUE map_merged_stored(VALUE left, VALUE right, VALUE init, nm::itype_t
1325
1360
  RowIterator<IType> tit(t, tm.ija, ri + t_offsets[0], shape[1], t_offsets[1]);
1326
1361
 
1327
1362
  RowIterator<IType> rit(r, reinterpret_cast<IType*>(r->ija), ri, shape[1]);
1328
- while (!rit.end() && (!sit.end() || !tit.end())) {
1363
+ while (!sit.end() || !tit.end()) {
1329
1364
  VALUE rv;
1330
1365
  IType rj;
1331
1366
 
@@ -1360,179 +1395,99 @@ static VALUE map_merged_stored(VALUE left, VALUE right, VALUE init, nm::itype_t
1360
1395
  }
1361
1396
 
1362
1397
 
1363
- } // end of namespace nm::yale_storage
1364
-
1365
-
1366
- // Helper function used only for the RETURN_SIZED_ENUMERATOR macro. Returns the length of
1367
- // the matrix's storage.
1368
- static VALUE nm_yale_stored_enumerator_length(VALUE nmatrix) {
1369
- long len = nm_yale_storage_get_size(NM_STORAGE_YALE(nmatrix));
1370
- return LONG2NUM(len);
1371
- }
1372
-
1373
-
1374
-
1398
+ /*
1399
+ * This function and the two helper structs enable us to use partial template specialization.
1400
+ * See also: http://stackoverflow.com/questions/6623375/c-template-specialization-on-functions
1401
+ */
1375
1402
  template <typename DType, typename IType>
1376
- struct yale_iteration_helper {
1377
-
1378
- static VALUE iterate_with_indices(VALUE nm) {
1379
- YALE_STORAGE* s = NM_STORAGE_YALE(nm);
1380
- DType* a = reinterpret_cast<DType*>(s->a);
1381
- IType* ija = reinterpret_cast<IType*>(s->ija);
1382
-
1383
- // If we don't have a block, return an enumerator.
1384
- RETURN_SIZED_ENUMERATOR(nm, 0, 0, nm_yale_enumerator_length);
1403
+ static VALUE each_stored_with_indices(VALUE nm) {
1404
+ YALE_STORAGE* s = NM_STORAGE_YALE(nm);
1405
+ DType* a = reinterpret_cast<DType*>(s->a);
1406
+ IType* ija = reinterpret_cast<IType*>(s->ija);
1385
1407
 
1386
- // Iterate in two dimensions.
1387
- for (long i = 0; i < s->shape[0]; ++i) {
1388
- VALUE ii = LONG2NUM(i);
1408
+ // If we don't have a block, return an enumerator.
1409
+ RETURN_SIZED_ENUMERATOR(nm, 0, 0, nm_yale_stored_enumerator_length);
1389
1410
 
1390
- IType k = ija[i], k_next = ija[i+1];
1391
-
1392
- for (long j = 0; j < s->shape[1]; ++j) {
1393
- VALUE v, jj = LONG2NUM(j);
1394
-
1395
- // zero is stored in s->shape[0]
1396
- if (i == j) {
1397
- v = rubyobj_from_cval(&(a[i]), NM_DTYPE(nm)).rval;
1398
- } else {
1399
- // Walk through the row until we find the correct location.
1400
- while (ija[k] < j && k < k_next) ++k;
1401
- if (k < k_next && ija[k] == j) {
1402
- v = rubyobj_from_cval(&(a[k]), NM_DTYPE(nm)).rval;
1403
- ++k;
1404
- } else v = rubyobj_from_cval(&(a[s->shape[0]]), NM_DTYPE(nm)).rval;
1405
- }
1406
- rb_yield_values(3, v, ii, jj);
1407
- }
1408
- }
1411
+ // Iterate along diagonal
1412
+ for (size_t sk = NM_MAX(s->offset[0], s->offset[1]); sk < NM_MIN(s->shape[0] + s->offset[0], s->shape[1] + s->offset[1]); ++sk) {
1413
+ VALUE ii = LONG2NUM(sk - s->offset[0]),
1414
+ jj = LONG2NUM(sk - s->offset[1]);
1409
1415
 
1410
- return nm;
1416
+ rb_yield_values(3, obj_at(s, sk), ii, jj);
1411
1417
  }
1412
1418
 
1419
+ // Iterate through non-diagonal elements, row by row
1420
+ for (long ri = 0; ri < s->shape[0]; ++ri) {
1421
+ long si = ri + s->offset[0];
1422
+ IType p = ija[si],
1423
+ next_p = ija[si+1];
1413
1424
 
1414
- static VALUE iterate_stored_with_indices(VALUE nm) {
1415
-
1416
- YALE_STORAGE* s = NM_STORAGE_YALE(nm);
1417
- DType* a = reinterpret_cast<DType*>(s->a);
1418
- IType* ija = reinterpret_cast<IType*>(s->ija);
1419
-
1420
- // If we don't have a block, return an enumerator.
1421
- RETURN_SIZED_ENUMERATOR(nm, 0, 0, nm_yale_stored_enumerator_length);
1422
-
1423
- // Iterate along diagonal
1424
- for (size_t k = 0; k < s->shape[0]; ++k) {
1425
- VALUE ii = LONG2NUM(k),
1426
- jj = LONG2NUM(k);
1425
+ // if this is a reference to another matrix, we should find the left boundary of the slice
1426
+ if (s != s->src && p < next_p)
1427
+ p = binary_search_left_boundary<IType>(s, p, next_p-1, s->offset[1]);
1427
1428
 
1428
- VALUE v = rubyobj_from_cval(&(a[k]), NM_DTYPE(nm)).rval;
1429
- rb_yield_values(3, v, ii, jj );
1430
- }
1431
-
1432
- // Iterate through non-diagonal elements, row by row
1433
- for (long i = 0; i < s->shape[0]; ++i) {
1434
- long p = static_cast<long>( ija[i] ),
1435
- next_p = static_cast<long>( ija[i+1] );
1429
+ for (; p < next_p; ++p) {
1430
+ long sj = static_cast<long>(ija[p]),
1431
+ rj = sj - s->offset[1];
1432
+ if (rj < 0) continue;
1436
1433
 
1437
- for (; p < next_p; ++p) {
1438
- long j = static_cast<long>(ija[p]);
1439
- VALUE ii = LONG2NUM(i),
1440
- jj = LONG2NUM(j);
1434
+ if (rj >= s->shape[1]) break;
1441
1435
 
1442
- VALUE v = rubyobj_from_cval(&(a[p]), NM_DTYPE(nm)).rval;
1443
- rb_yield_values(3, v, ii, jj);
1444
- }
1436
+ rb_yield_values(3, obj_at(s, p), LONG2NUM(ri), LONG2NUM(rj));
1445
1437
  }
1446
-
1447
- return nm;
1448
1438
  }
1449
- };
1450
1439
 
1440
+ return nm;
1441
+ }
1451
1442
 
1452
- template <typename IType>
1453
- struct yale_iteration_helper<RubyObject, IType> {
1454
- static VALUE iterate_with_indices(VALUE nm) {
1455
- YALE_STORAGE* s = NM_STORAGE_YALE(nm);
1456
- RubyObject* a = reinterpret_cast<RubyObject*>(s->a);
1457
- IType* ija = reinterpret_cast<IType*>(s->ija);
1443
+ template <typename DType, typename IType>
1444
+ static VALUE each_with_indices(VALUE nm) {
1445
+ YALE_STORAGE* s = NM_STORAGE_YALE(nm);
1446
+ DType* a = reinterpret_cast<DType*>(s->a);
1447
+ IType* ija = reinterpret_cast<IType*>(s->ija);
1458
1448
 
1459
- // If we don't have a block, return an enumerator.
1460
- RETURN_SIZED_ENUMERATOR(nm, 0, 0, nm_yale_enumerator_length);
1449
+ // If we don't have a block, return an enumerator.
1450
+ RETURN_SIZED_ENUMERATOR(nm, 0, 0, nm_yale_enumerator_length);
1461
1451
 
1462
- // Iterate in two dimensions.
1463
- for (long i = 0; i < s->shape[0]; ++i) {
1464
- VALUE ii = LONG2NUM(i);
1452
+ // Iterate in two dimensions.
1453
+ // s stands for src, r stands for ref (for ri, rj, si, sj)
1454
+ for (long ri = 0; ri < s->shape[0]; ++ri) {
1455
+ long si = ri + s->offset[0];
1456
+ VALUE ii = LONG2NUM(ri + s->offset[0]);
1465
1457
 
1466
- IType k = ija[i], k_next = ija[i+1];
1458
+ IType k = ija[si], k_next = ija[si+1];
1467
1459
 
1468
- for (long j = 0; j < s->shape[1]; ++j) {
1469
- VALUE v, jj = LONG2NUM(j);
1460
+ for (long rj = 0; rj < s->shape[1]; ++rj) {
1461
+ long sj = rj + s->offset[1];
1462
+ VALUE v, jj = LONG2NUM(rj);
1470
1463
 
1471
- // zero is stored in s->shape[0]
1472
- if (i == j) {
1473
- v = a[i].rval;
1474
- } else {
1475
- // Walk through the row until we find the correct location.
1476
- while (ija[k] < j && k < k_next) ++k;
1477
- if (k < k_next && ija[k] == j) {
1478
- v = a[k].rval;
1479
- ++k;
1480
- } else v = a[s->shape[0]].rval;
1481
- }
1482
- rb_yield_values(3, v, ii, jj);
1464
+ // zero is stored in s->shape[0]
1465
+ if (si == sj) {
1466
+ v = obj_at(s, si);
1467
+ } else {
1468
+ // Walk through the row until we find the correct location.
1469
+ while (ija[k] < sj && k < k_next) ++k;
1470
+ if (k < k_next && ija[k] == sj) {
1471
+ v = obj_at(s, k);
1472
+ ++k;
1473
+ } else v = default_value(s); // rubyobj_from_cval(&(a[s->shape[0]]), NM_DTYPE(nm)).rval;
1483
1474
  }
1475
+ rb_yield_values(3, v, ii, jj);
1484
1476
  }
1485
-
1486
- return nm;
1487
1477
  }
1488
1478
 
1489
- static VALUE iterate_stored_with_indices(VALUE nm) {
1490
-
1491
- YALE_STORAGE* s = NM_STORAGE_YALE(nm);
1492
- RubyObject* a = reinterpret_cast<RubyObject*>(s->a);
1493
- IType* ija = reinterpret_cast<IType*>(s->ija);
1494
-
1495
- // If we don't have a block, return an enumerator.
1496
- RETURN_SIZED_ENUMERATOR(nm, 0, 0, nm_yale_stored_enumerator_length);
1497
-
1498
- // Iterate along diagonal
1499
- for (size_t k = 0; k < s->shape[0]; ++k) {
1500
- VALUE ii = LONG2NUM(k),
1501
- jj = LONG2NUM(k);
1502
- rb_yield_values(3, a[k].rval, ii, jj ); // yield element, i, j
1503
- }
1504
-
1505
- // Iterate through non-diagonal elements, row by row
1506
- for (long i = 0; i < s->shape[0]; ++i) {
1507
- IType p = ija[i],
1508
- next_p = ija[i+1];
1509
-
1510
- for (; p < next_p; ++p) {
1511
- long j = static_cast<long>(ija[p]);
1512
- VALUE ii = LONG2NUM(i),
1513
- jj = LONG2NUM(j);
1514
-
1515
- rb_yield_values(3, a[p].rval, ii, jj );
1516
- }
1517
- }
1479
+ return nm;
1480
+ }
1518
1481
 
1519
- return nm;
1520
- }
1521
- };
1522
1482
 
1483
+ } // end of namespace nm::yale_storage
1523
1484
 
1524
- /*
1525
- * This function and the two helper structs enable us to use partial template specialization.
1526
- * See also: http://stackoverflow.com/questions/6623375/c-template-specialization-on-functions
1527
- */
1528
- template <typename DType, typename IType>
1529
- static VALUE yale_each_stored_with_indices(VALUE nm) {
1530
- return yale_iteration_helper<DType, IType>::iterate_stored_with_indices(nm);
1531
- }
1532
1485
 
1533
- template <typename DType, typename IType>
1534
- static VALUE yale_each_with_indices(VALUE nm) {
1535
- return yale_iteration_helper<DType, IType>::iterate_with_indices(nm);
1486
+ // Helper function used only for the RETURN_SIZED_ENUMERATOR macro. Returns the length of
1487
+ // the matrix's storage.
1488
+ static VALUE nm_yale_stored_enumerator_length(VALUE nmatrix) {
1489
+ long len = nm_yale_storage_get_size(NM_STORAGE_YALE(nmatrix));
1490
+ return LONG2NUM(len);
1536
1491
  }
1537
1492
 
1538
1493
 
@@ -1577,7 +1532,7 @@ VALUE nm_yale_each_with_indices(VALUE nmatrix) {
1577
1532
  nm::dtype_t d = NM_DTYPE(nmatrix);
1578
1533
  nm::itype_t i = NM_ITYPE(nmatrix);
1579
1534
 
1580
- NAMED_LI_DTYPE_TEMPLATE_TABLE(ttable, nm::yale_each_with_indices, VALUE, VALUE)
1535
+ NAMED_LI_DTYPE_TEMPLATE_TABLE(ttable, nm::yale_storage::each_with_indices, VALUE, VALUE)
1581
1536
 
1582
1537
  return ttable[d][i](nmatrix);
1583
1538
  }
@@ -1588,7 +1543,7 @@ VALUE nm_yale_each_stored_with_indices(VALUE nmatrix) {
1588
1543
  nm::dtype_t d = NM_DTYPE(nmatrix);
1589
1544
  nm::itype_t i = NM_ITYPE(nmatrix);
1590
1545
 
1591
- NAMED_LI_DTYPE_TEMPLATE_TABLE(ttable, nm::yale_each_stored_with_indices, VALUE, VALUE)
1546
+ NAMED_LI_DTYPE_TEMPLATE_TABLE(ttable, nm::yale_storage::each_stored_with_indices, VALUE, VALUE)
1592
1547
 
1593
1548
  return ttable[d][i](nmatrix);
1594
1549
  }
@@ -1606,18 +1561,53 @@ char nm_yale_storage_set(STORAGE* storage, SLICE* slice, void* v) {
1606
1561
  return ttable[casted_storage->dtype][casted_storage->itype](casted_storage, slice, v);
1607
1562
  }
1608
1563
 
1564
+
1565
+ /*
1566
+ * Determine the number of non-diagonal non-zeros in a not-yet-created copy of a slice or matrix.
1567
+ */
1568
+ static size_t yale_count_slice_copy_ndnz(const YALE_STORAGE* s, size_t* offset, size_t* shape) {
1569
+ NAMED_LI_DTYPE_TEMPLATE_TABLE(ttable, nm::yale_storage::count_slice_copy_ndnz, size_t, const YALE_STORAGE*, size_t*, size_t*)
1570
+
1571
+ return ttable[s->dtype][s->itype](s, offset, shape);
1572
+ }
1573
+
1574
+
1609
1575
  /*
1610
- * C accessor for yale_storage::get, which returns a slice of YALE_STORAGE object by coppy
1576
+ * C accessor for yale_storage::get, which returns a slice of YALE_STORAGE object by copy
1611
1577
  *
1612
1578
  * Slicing-related.
1613
1579
  */
1614
1580
  void* nm_yale_storage_get(STORAGE* storage, SLICE* slice) {
1615
- NAMED_LI_DTYPE_TEMPLATE_TABLE(ttable, nm::yale_storage::get, void*, YALE_STORAGE* storage, SLICE* slice);
1616
- YALE_STORAGE* s = (YALE_STORAGE*)storage;
1581
+ YALE_STORAGE* casted_storage = (YALE_STORAGE*)storage;
1617
1582
 
1583
+ if (slice->single) {
1584
+ NAMED_LI_DTYPE_TEMPLATE_TABLE(elem_copy_table, nm::yale_storage::get_single, void*, YALE_STORAGE*, SLICE*)
1618
1585
 
1619
- YALE_STORAGE* casted_storage = (YALE_STORAGE*)storage;
1620
- return ttable[casted_storage->dtype][casted_storage->itype](casted_storage, slice);
1586
+ return elem_copy_table[casted_storage->dtype][casted_storage->itype](casted_storage, slice);
1587
+ } else {
1588
+ // Copy shape for yale construction
1589
+ size_t* shape = ALLOC_N(size_t, 2);
1590
+ shape[0] = slice->lengths[0];
1591
+ shape[1] = slice->lengths[1];
1592
+
1593
+ // only count ndnz if our slice is smaller, otherwise use the given value
1594
+ size_t ndnz = src_ndnz(casted_storage);
1595
+ if (shape[0] != casted_storage->shape[0] || shape[1] != casted_storage->shape[1])
1596
+ ndnz = yale_count_slice_copy_ndnz(casted_storage, slice->coords, shape); // expensive operation
1597
+
1598
+ size_t request_capacity = shape[0] + ndnz + 1; // capacity of new matrix
1599
+ YALE_STORAGE* ns = nm_yale_storage_create(casted_storage->dtype, shape, 2, request_capacity, casted_storage->itype);
1600
+
1601
+ // This check probably isn't necessary.
1602
+ if (ns->capacity < request_capacity)
1603
+ rb_raise(nm_eStorageTypeError, "conversion failed; capacity of %ld requested, max allowable is %ld", request_capacity, ns->capacity);
1604
+
1605
+ NAMED_LRI_DTYPE_TEMPLATE_TABLE(slice_copy_table, nm::yale_storage::slice_copy, void, YALE_STORAGE* ns, const YALE_STORAGE* s, size_t*, size_t*, nm::dtype_t)
1606
+
1607
+ slice_copy_table[ns->dtype][casted_storage->dtype][casted_storage->itype](ns, casted_storage, slice->coords, slice->lengths, casted_storage->dtype);
1608
+
1609
+ return ns;
1610
+ }
1621
1611
  }
1622
1612
 
1623
1613
  /*
@@ -1644,10 +1634,15 @@ static void nm_yale_storage_increment_ia_after(YALE_STORAGE* s, size_t ija_size,
1644
1634
  * for some set of coordinates.
1645
1635
  */
1646
1636
  void* nm_yale_storage_ref(STORAGE* storage, SLICE* slice) {
1647
- NAMED_LI_DTYPE_TEMPLATE_TABLE(ttable, nm::yale_storage::ref, void*, YALE_STORAGE* storage, SLICE* slice);
1648
-
1649
1637
  YALE_STORAGE* casted_storage = (YALE_STORAGE*)storage;
1650
- return ttable[casted_storage->dtype][casted_storage->itype](casted_storage, slice);
1638
+
1639
+ if (slice->single) {
1640
+ NAMED_LI_DTYPE_TEMPLATE_TABLE(elem_copy_table, nm::yale_storage::get_single, void*, YALE_STORAGE*, SLICE*)
1641
+ return elem_copy_table[casted_storage->dtype][casted_storage->itype](casted_storage, slice);
1642
+ } else {
1643
+ NAMED_LI_DTYPE_TEMPLATE_TABLE(ref_table, nm::yale_storage::ref, void*, YALE_STORAGE* storage, SLICE* slice)
1644
+ return ref_table[casted_storage->dtype][casted_storage->itype](casted_storage, slice);
1645
+ }
1651
1646
  }
1652
1647
 
1653
1648
 
@@ -1685,11 +1680,20 @@ size_t nm_yale_storage_get_size(const YALE_STORAGE* storage) {
1685
1680
  }
1686
1681
 
1687
1682
 
1683
+
1688
1684
  /*
1689
1685
  * Return a void pointer to the matrix's default value entry.
1690
1686
  */
1691
1687
  static void* default_value_ptr(const YALE_STORAGE* s) {
1692
- return reinterpret_cast<void*>(reinterpret_cast<char*>(s->a) + (s->shape[0] * DTYPE_SIZES[s->dtype]));
1688
+ return reinterpret_cast<void*>(reinterpret_cast<char*>(s->a) + (s->src->shape[0] * DTYPE_SIZES[s->dtype]));
1689
+ }
1690
+
1691
+ /*
1692
+ * Return the Ruby object at a given location in storage.
1693
+ */
1694
+ static VALUE obj_at(YALE_STORAGE* s, size_t k) {
1695
+ if (s->dtype == nm::RUBYOBJ) return reinterpret_cast<VALUE*>(s->a)[k];
1696
+ else return rubyobj_from_cval(reinterpret_cast<void*>(reinterpret_cast<char*>(s->a) + k * DTYPE_SIZES[s->dtype]), s->dtype).rval;
1693
1697
  }
1694
1698
 
1695
1699
 
@@ -1816,11 +1820,26 @@ YALE_STORAGE* nm_yale_storage_create(nm::dtype_t dtype, size_t* shape, size_t di
1816
1820
  void nm_yale_storage_delete(STORAGE* s) {
1817
1821
  if (s) {
1818
1822
  YALE_STORAGE* storage = (YALE_STORAGE*)s;
1823
+ if (storage->count-- == 1) {
1824
+ xfree(storage->shape);
1825
+ xfree(storage->offset);
1826
+ xfree(storage->ija);
1827
+ xfree(storage->a);
1828
+ xfree(storage);
1829
+ }
1830
+ }
1831
+ }
1832
+
1833
+ /*
1834
+ * Destructor for the yale storage ref
1835
+ */
1836
+ void nm_yale_storage_delete_ref(STORAGE* s) {
1837
+ if (s) {
1838
+ YALE_STORAGE* storage = (YALE_STORAGE*)s;
1839
+ nm_yale_storage_delete( reinterpret_cast<STORAGE*>(storage->src) );
1819
1840
  xfree(storage->shape);
1820
1841
  xfree(storage->offset);
1821
- xfree(storage->ija);
1822
- xfree(storage->a);
1823
- xfree(storage);
1842
+ xfree(s);
1824
1843
  }
1825
1844
  }
1826
1845
 
@@ -1850,6 +1869,7 @@ void nm_yale_storage_mark(void* storage_base) {
1850
1869
  }
1851
1870
  }
1852
1871
 
1872
+
1853
1873
  /*
1854
1874
  * Allocates and initializes the basic struct (but not the IJA or A vectors).
1855
1875
  */
@@ -1867,6 +1887,7 @@ static YALE_STORAGE* alloc(nm::dtype_t dtype, size_t* shape, size_t dim, nm::ity
1867
1887
  s->dim = dim;
1868
1888
  s->itype = nm_yale_storage_itype_by_shape(shape);
1869
1889
  s->src = reinterpret_cast<STORAGE*>(s);
1890
+ s->count = 1;
1870
1891
 
1871
1892
  // See if a higher itype has been requested.
1872
1893
  if (static_cast<int8_t>(s->itype) < static_cast<int8_t>(min_itype))
@@ -1935,7 +1956,7 @@ static VALUE nm_a(int argc, VALUE* argv, VALUE self) {
1935
1956
  }
1936
1957
  VALUE ary = rb_ary_new4(size, vals);
1937
1958
 
1938
- for (size_t i = size; i < s->capacity; ++i)
1959
+ for (size_t i = size; i < reinterpret_cast<YALE_STORAGE*>(s->src)->capacity; ++i)
1939
1960
  rb_ary_push(ary, Qnil);
1940
1961
 
1941
1962
  return ary;
@@ -2008,7 +2029,7 @@ static VALUE nm_lu(VALUE self) {
2008
2029
 
2009
2030
  VALUE ary = rb_ary_new4(size - s->shape[0] - 1, vals);
2010
2031
 
2011
- for (size_t i = size; i < s->capacity; ++i)
2032
+ for (size_t i = size; i < reinterpret_cast<YALE_STORAGE*>(s->src)->capacity; ++i)
2012
2033
  rb_ary_push(ary, Qnil);
2013
2034
 
2014
2035
  return ary;
@@ -2053,7 +2074,7 @@ static VALUE nm_ja(VALUE self) {
2053
2074
 
2054
2075
  VALUE ary = rb_ary_new4(size - s->shape[0] - 1, vals);
2055
2076
 
2056
- for (size_t i = size; i < s->capacity; ++i)
2077
+ for (size_t i = size; i < reinterpret_cast<YALE_STORAGE*>(s->src)->capacity; ++i)
2057
2078
  rb_ary_push(ary, Qnil);
2058
2079
 
2059
2080
  return ary;
@@ -2083,7 +2104,7 @@ static VALUE nm_ija(int argc, VALUE* argv, VALUE self) {
2083
2104
 
2084
2105
  VALUE ary = rb_ary_new4(size, vals);
2085
2106
 
2086
- for (size_t i = size; i < s->capacity; ++i)
2107
+ for (size_t i = size; i < reinterpret_cast<YALE_STORAGE*>(s->src)->capacity; ++i)
2087
2108
  rb_ary_push(ary, Qnil);
2088
2109
 
2089
2110
  return ary;
@@ -2127,9 +2148,7 @@ static VALUE nm_nd_row(int argc, VALUE* argv, VALUE self) {
2127
2148
  size_t nextpos = FIX2INT(rubyobj_from_cval_by_itype((char*)(s->ija) + ITYPE_SIZES[itype]*(i+1), itype).rval);
2128
2149
  size_t diff = nextpos - pos;
2129
2150
 
2130
- //std::cerr << "diff = " << diff << "\tpos = " << pos << "\tnextpos = " << nextpos << std::endl;
2131
-
2132
- VALUE ret; // HERE
2151
+ VALUE ret;
2133
2152
  if (keys) {
2134
2153
  ret = rb_ary_new3(diff);
2135
2154
 
@@ -2212,7 +2231,7 @@ VALUE nm_vector_set(int argc, VALUE* argv, VALUE self) { //, VALUE i_, VALUE jv,
2212
2231
 
2213
2232
  char ins_type = nm_yale_storage_vector_insert(s, pos, j, vals, len, false, dtype, itype);
2214
2233
  nm_yale_storage_increment_ia_after(s, s->shape[0], i, len, itype);
2215
- s->ndnz += len;
2234
+ reinterpret_cast<YALE_STORAGE*>(s->src)->ndnz += len;
2216
2235
 
2217
2236
  // Return the updated position
2218
2237
  pos += len;