nmatrix 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. data/.autotest +23 -0
  2. data/.gemtest +0 -0
  3. data/Gemfile +7 -0
  4. data/History.txt +6 -0
  5. data/LICENSE.txt +21 -0
  6. data/Manifest.txt +51 -0
  7. data/README.rdoc +63 -0
  8. data/Rakefile +154 -0
  9. data/ext/nmatrix/cblas.c +150 -0
  10. data/ext/nmatrix/dense.c +307 -0
  11. data/ext/nmatrix/dense/blas_header.template.c +52 -0
  12. data/ext/nmatrix/dense/elementwise.template.c +107 -0
  13. data/ext/nmatrix/dense/gemm.template.c +159 -0
  14. data/ext/nmatrix/dense/gemv.template.c +130 -0
  15. data/ext/nmatrix/dense/rationalmath.template.c +68 -0
  16. data/ext/nmatrix/depend +18 -0
  17. data/ext/nmatrix/extconf.rb +143 -0
  18. data/ext/nmatrix/generator.rb +594 -0
  19. data/ext/nmatrix/generator/syntax_tree.rb +481 -0
  20. data/ext/nmatrix/list.c +774 -0
  21. data/ext/nmatrix/nmatrix.c +1977 -0
  22. data/ext/nmatrix/nmatrix.h +912 -0
  23. data/ext/nmatrix/rational.c +98 -0
  24. data/ext/nmatrix/yale.c +726 -0
  25. data/ext/nmatrix/yale/complexmath.template.c +71 -0
  26. data/ext/nmatrix/yale/elementwise.template.c +46 -0
  27. data/ext/nmatrix/yale/elementwise_op.template.c +73 -0
  28. data/ext/nmatrix/yale/numbmm.template.c +94 -0
  29. data/ext/nmatrix/yale/smmp1.template.c +21 -0
  30. data/ext/nmatrix/yale/smmp1_header.template.c +38 -0
  31. data/ext/nmatrix/yale/smmp2.template.c +43 -0
  32. data/ext/nmatrix/yale/smmp2_header.template.c +46 -0
  33. data/ext/nmatrix/yale/sort_columns.template.c +56 -0
  34. data/ext/nmatrix/yale/symbmm.template.c +54 -0
  35. data/ext/nmatrix/yale/transp.template.c +68 -0
  36. data/lib/array.rb +67 -0
  37. data/lib/nmatrix.rb +263 -0
  38. data/lib/string.rb +65 -0
  39. data/spec/nmatrix_spec.rb +395 -0
  40. data/spec/nmatrix_yale_spec.rb +239 -0
  41. data/spec/nvector_spec.rb +43 -0
  42. data/spec/syntax_tree_spec.rb +46 -0
  43. metadata +150 -0
@@ -0,0 +1,1977 @@
1
+ /////////////////////////////////////////////////////////////////////
2
+ // = NMatrix
3
+ //
4
+ // A linear algebra library for scientific computation in Ruby.
5
+ // NMatrix is part of SciRuby.
6
+ //
7
+ // NMatrix was originally inspired by and derived from NArray, by
8
+ // Masahiro Tanaka: http://narray.rubyforge.org
9
+ //
10
+ // == Copyright Information
11
+ //
12
+ // SciRuby is Copyright (c) 2010 - 2012, Ruby Science Foundation
13
+ // NMatrix is Copyright (c) 2012, Ruby Science Foundation
14
+ //
15
+ // Please see LICENSE.txt for additional copyright notices.
16
+ //
17
+ // == Contributing
18
+ //
19
+ // By contributing source code to SciRuby, you agree to be bound by
20
+ // our Contributor Agreement:
21
+ //
22
+ // * https://github.com/SciRuby/sciruby/wiki/Contributor-Agreement
23
+ //
24
+ // == nmatrix.c
25
+ //
26
+ #ifndef NMATRIX_C
27
+ # define NMATRIX_C
28
+
29
+ #include <ruby.h>
30
+
31
+ #include "nmatrix.h"
32
+ //#include "types.h"
33
+
34
+ VALUE cNMatrix, cNVector;
35
+ VALUE nm_eDataTypeError, nm_eStorageTypeError;
36
+
37
+ ID nm_id_real, nm_id_imag;
38
+ ID nm_id_numer, nm_id_denom;
39
+ ID nm_id_transpose, nm_id_no_transpose, nm_id_complex_conjugate; // cblas
40
+ ID nm_id_list, nm_id_dense;
41
+ ID nm_id_mult, nm_id_multeq;
42
+ ID nm_id_add;
43
+
44
+ #include "dtypes.c"
45
+
46
+ #ifdef BENCHMARK
47
+ double get_time() {
48
+ struct timeval t;
49
+ struct timezone tzp;
50
+ gettimeofday(&t, &tzp);
51
+ return t.tv_sec + t.tv_usec*1e-6;
52
+ }
53
+ #endif
54
+
55
+
56
+ const char *nm_stypestring[] = {
57
+ "dense",
58
+ "list",
59
+ "yale",
60
+ "stypes"
61
+ };
62
+
63
+
64
+ nm_delete_t DeleteFuncs = {
65
+ delete_dense_storage,
66
+ delete_list_storage,
67
+ delete_yale_storage
68
+ };
69
+
70
+
71
+ nm_mark_t MarkFuncs = {
72
+ mark_dense_storage,
73
+ mark_list_storage,
74
+ mark_yale_storage
75
+ };
76
+
77
+
78
+ nm_gemv_t GemvFuncs = {
79
+ NULL,
80
+ cblas_bgemv_,
81
+ cblas_i8gemv_,
82
+ cblas_i16gemv_,
83
+ cblas_i32gemv_,
84
+ cblas_i64gemv_,
85
+ cblas_sgemv_,
86
+ cblas_dgemv_,
87
+ cblas_cgemv_,
88
+ cblas_zgemv_,
89
+ cblas_r32gemv_,
90
+ cblas_r64gemv_,
91
+ cblas_r128gemv_,
92
+ NULL
93
+ };
94
+
95
+
96
+ nm_gemm_t GemmFuncs = { // by NM_TYPES
97
+ NULL,
98
+ cblas_bgemm_,
99
+ cblas_i8gemm_,
100
+ cblas_i16gemm_,
101
+ cblas_i32gemm_,
102
+ cblas_i64gemm_,
103
+ cblas_sgemm_,
104
+ cblas_dgemm_,
105
+ cblas_cgemm_,
106
+ cblas_zgemm_,
107
+ cblas_r32gemm_,
108
+ cblas_r64gemm_,
109
+ cblas_r128gemm_,
110
+ cblas_vgemm_
111
+ };
112
+
113
+ static void TransposeTypeErr(y_size_t n, y_size_t m, YALE_PARAM A, YALE_PARAM B, bool move) {
114
+ rb_raise(nm_eDataTypeError, "illegal operation with this matrix type");
115
+ }
116
+
117
+
118
+ // First dimension is dtype, second dimension is index dtype (so lots of nulls)
119
+ nm_smmp_transpose_t SparseTransposeFuncs = {
120
+ {TransposeTypeErr, TransposeTypeErr, TransposeTypeErr, TransposeTypeErr, TransposeTypeErr, TransposeTypeErr}, // NM_NONE
121
+ {TransposeTypeErr, TransposeTypeErr, i8_b_transp_, i16_b_transp_, i32_b_transp_, i64_b_transp_}, // NM_BYTE
122
+ {TransposeTypeErr, TransposeTypeErr, i8_i8_transp_, i16_i8_transp_, i32_i8_transp_, i64_i8_transp_}, // NM_INT8
123
+ {TransposeTypeErr, TransposeTypeErr, i8_i16_transp_, i16_i16_transp_, i32_i16_transp_, i64_i16_transp_}, // NM_INT16
124
+ {TransposeTypeErr, TransposeTypeErr, i8_i32_transp_, i16_i32_transp_, i32_i32_transp_, i64_i32_transp_}, // NM_INT32
125
+ {TransposeTypeErr, TransposeTypeErr, i8_i64_transp_, i16_i64_transp_, i32_i64_transp_, i64_i64_transp_}, // NM_INT64
126
+ {TransposeTypeErr, TransposeTypeErr, i8_f32_transp_, i16_f32_transp_, i32_f32_transp_, i64_f32_transp_}, // NM_FLOAT32
127
+ {TransposeTypeErr, TransposeTypeErr, i8_f64_transp_, i16_f64_transp_, i32_f64_transp_, i64_f64_transp_}, // NM_FLOAT64
128
+ {TransposeTypeErr, TransposeTypeErr, i8_c64_transp_, i16_c64_transp_, i32_c64_transp_, i64_c64_transp_}, // NM_COMPLEX64
129
+ {TransposeTypeErr, TransposeTypeErr, i8_c128_transp_, i16_c128_transp_, i32_c128_transp_, i64_c128_transp_}, // NM_COMPLEX128
130
+ {TransposeTypeErr, TransposeTypeErr, i8_r32_transp_, i16_r32_transp_, i32_r32_transp_, i64_r32_transp_}, // NM_RATIONAL32
131
+ {TransposeTypeErr, TransposeTypeErr, i8_r64_transp_, i16_r64_transp_, i32_r64_transp_, i64_r64_transp_}, // NM_RATIONAL64
132
+ {TransposeTypeErr, TransposeTypeErr, i8_r128_transp_, i16_r128_transp_, i32_r128_transp_, i64_r128_transp_}, // NM_RATIONAL128
133
+ {TransposeTypeErr, TransposeTypeErr, i8_v_transp_, i16_v_transp_, i32_v_transp_, i64_v_transp_} // NM_ROBJ
134
+ };
135
+
136
+ /*
137
+ // Currently commented out because dense_transpose_generic is about the same speed. Let's resurrect this when we write
138
+ // an in-place transpose (e.g., transpose!).
139
+
140
+ static void DenseTransTypeErr(int M, int N, void* A, int lda, void* B, int ldb, bool move) {
141
+ rb_raise(nm_eDataTypeError, "illegal operation with this matrix type");
142
+ }
143
+
144
+ nm_dense_transpose_t DenseTransposeFuncs = {
145
+ DenseTransTypeErr,
146
+ btransp,
147
+ i8transp,
148
+ i16transp,
149
+ i32transp,
150
+ i64transp,
151
+ f32transp,
152
+ f64transp,
153
+ c64transp,
154
+ c128transp,
155
+ r32transp,
156
+ r64transp,
157
+ r128transp,
158
+ vtransp
159
+ }; */
160
+
161
+
162
+ static void SmmpTypeErr(y_size_t n, y_size_t m, YALE_PARAM A, YALE_PARAM B, YALE_PARAM C) {
163
+ rb_raise(nm_eDataTypeError, "illegal operation with this matrix type");
164
+ }
165
+
166
+ // First dimension is dtype, second dimension is index dtype (so lots of nulls)
167
+ nm_smmp_t SmmpFuncs = {
168
+ {SmmpTypeErr, SmmpTypeErr, SmmpTypeErr, SmmpTypeErr, SmmpTypeErr, SmmpTypeErr}, // NM_NONE
169
+ {SmmpTypeErr, SmmpTypeErr, i8_b_smmp, i16_b_smmp, i32_b_smmp, i64_b_smmp}, // NM_BYTE
170
+ {SmmpTypeErr, SmmpTypeErr, i8_i8_smmp, i16_i8_smmp, i32_i8_smmp, i64_i8_smmp}, // NM_INT8
171
+ {SmmpTypeErr, SmmpTypeErr, i8_i16_smmp, i16_i16_smmp, i32_i16_smmp, i64_i16_smmp}, // NM_INT16
172
+ {SmmpTypeErr, SmmpTypeErr, i8_i32_smmp, i16_i32_smmp, i32_i32_smmp, i64_i32_smmp}, // NM_INT32
173
+ {SmmpTypeErr, SmmpTypeErr, i8_i64_smmp, i16_i64_smmp, i32_i64_smmp, i64_i64_smmp}, // NM_INT64
174
+ {SmmpTypeErr, SmmpTypeErr, i8_f32_smmp, i16_f32_smmp, i32_f32_smmp, i64_f32_smmp}, // NM_FLOAT32
175
+ {SmmpTypeErr, SmmpTypeErr, i8_f64_smmp, i16_f64_smmp, i32_f64_smmp, i64_f64_smmp}, // NM_FLOAT64
176
+ {SmmpTypeErr, SmmpTypeErr, i8_c64_smmp, i16_c64_smmp, i32_c64_smmp, i64_c64_smmp}, // NM_COMPLEX64
177
+ {SmmpTypeErr, SmmpTypeErr, i8_c128_smmp, i16_c128_smmp, i32_c128_smmp, i64_c128_smmp}, // NM_COMPLEX128
178
+ {SmmpTypeErr, SmmpTypeErr, i8_r32_smmp, i16_r32_smmp, i32_r32_smmp, i64_r32_smmp}, // NM_RATIONAL32
179
+ {SmmpTypeErr, SmmpTypeErr, i8_r64_smmp, i16_r64_smmp, i32_r64_smmp, i64_r64_smmp}, // NM_RATIONAL64
180
+ {SmmpTypeErr, SmmpTypeErr, i8_r128_smmp, i16_r128_smmp, i32_r128_smmp, i64_r128_smmp}, // NM_RATIONAL128
181
+ {SmmpTypeErr, SmmpTypeErr, i8_v_smmp, i16_v_smmp, i32_v_smmp, i64_v_smmp} // NM_ROBJ
182
+ };
183
+
184
+
185
+ static inline DENSE_PARAM cblas_params_for_multiply(const DENSE_STORAGE* left, const DENSE_STORAGE* right, const DENSE_STORAGE* result, bool vector) {
186
+ DENSE_PARAM p;
187
+
188
+ p.A = left->elements;
189
+ p.B = right->elements; // for vector, this is actually x
190
+ p.C = result->elements; // vector Y
191
+
192
+ p.M = left->shape[0];
193
+ p.lda = left->shape[1];
194
+
195
+ if (vector) {
196
+ p.N = left->shape[1];
197
+
198
+ p.ldb = 1; // incX
199
+ p.ldc = 1; // incY
200
+ } else {
201
+ p.N = right->shape[1];
202
+ p.K = left->shape[1];
203
+
204
+ p.ldb = right->shape[1];
205
+ p.ldc = result->shape[1];
206
+ }
207
+
208
+ switch(left->dtype) {
209
+ case NM_FLOAT32:
210
+ case NM_FLOAT64:
211
+ p.alpha.d[0] = 1.0;
212
+ p.beta.d[0] = 0.0;
213
+ break;
214
+
215
+ case NM_COMPLEX64:
216
+ p.alpha.c[0].r = 1.0;
217
+ p.alpha.c[0].i = 0.0;
218
+ p.beta.c[0].r = 0.0;
219
+ p.beta.c[0].i = 0.0;
220
+ break;
221
+
222
+ case NM_COMPLEX128:
223
+ p.alpha.z.r = 1.0;
224
+ p.alpha.z.i = 0.0;
225
+ p.beta.z.r = 0.0;
226
+ p.beta.z.i = 0.0;
227
+ break;
228
+
229
+ case NM_BYTE:
230
+ p.alpha.b[0] = 1;
231
+ p.beta.b[0] = 0;
232
+ break;
233
+
234
+ case NM_INT8:
235
+ case NM_INT16:
236
+ case NM_INT32:
237
+ case NM_INT64:
238
+ p.alpha.i[0] = 1;
239
+ p.beta.i[0] = 0;
240
+ break;
241
+
242
+ case NM_RATIONAL32:
243
+ p.alpha.r[0].n = 1;
244
+ p.alpha.r[0].d = 1;
245
+ p.beta.r[0].n = 0;
246
+ p.beta.r[0].d = 1;
247
+ break;
248
+
249
+ case NM_RATIONAL64:
250
+ p.alpha.ra[0].n = 1;
251
+ p.alpha.ra[0].d = 1;
252
+ p.beta.ra[0].n = 0;
253
+ p.beta.ra[0].d = 1;
254
+ break;
255
+
256
+ case NM_RATIONAL128:
257
+ p.alpha.rat.n = 1;
258
+ p.alpha.rat.d = 1;
259
+ p.beta.rat.n = 0;
260
+ p.beta.rat.d = 1;
261
+ break;
262
+
263
+ case NM_ROBJ:
264
+ p.alpha.v[0] = INT2FIX(1);
265
+ p.beta.v[0] = RUBY_ZERO;
266
+ break;
267
+
268
+ default:
269
+ rb_raise(nm_eDataTypeError, "unexpected dtype");
270
+
271
+ }
272
+
273
+ return p;
274
+ }
275
+
276
+
277
+ static NMATRIX* multiply_matrix_dense_casted(STORAGE_PAIR casted_storage, size_t* resulting_shape, bool vector) {
278
+ DENSE_STORAGE *left = (DENSE_STORAGE*)(casted_storage.left),
279
+ *right = (DENSE_STORAGE*)(casted_storage.right),
280
+ *result;
281
+
282
+ // We can safely get dtype from the casted matrices; post-condition of binary_storage_cast_alloc is that dtype is the
283
+ // same for left and right.
284
+ int8_t dtype = left->dtype;
285
+
286
+ // Create result storage.
287
+ result = create_dense_storage(dtype, resulting_shape, 2, NULL, 0);
288
+
289
+ // Do the multiplication
290
+ if (vector) GemvFuncs[dtype](CblasRowMajor, CblasNoTrans, cblas_params_for_multiply(left, right, result, true));
291
+ else GemmFuncs[dtype](CblasRowMajor, CblasNoTrans, CblasNoTrans, cblas_params_for_multiply(left, right, result, false));
292
+
293
+ return nm_create(S_DENSE, result);
294
+ }
295
+
296
+
297
+ static NMATRIX* multiply_matrix_yale_casted(STORAGE_PAIR casted_storage, size_t* resulting_shape, bool vector) {
298
+ YALE_STORAGE *left = (YALE_STORAGE*)(casted_storage.left),
299
+ *right = (YALE_STORAGE*)(casted_storage.right),
300
+ *result;
301
+ YALE_PARAM A, B, C;
302
+
303
+ // We can safely get dtype from the casted matrices; post-condition of binary_storage_cast_alloc is that dtype is the
304
+ // same for left and right.
305
+ int8_t dtype = left->dtype;
306
+
307
+ // Create result storage.
308
+ result = create_yale_storage(dtype, resulting_shape, 2, left->capacity + right->capacity);
309
+ init_yale_storage(result);
310
+
311
+ // Set multiplication parameters
312
+ A.ia = A.ja = left->ija;
313
+ A.a = left->a;
314
+ B.ia = B.ja = right->ija;
315
+ B.a = right->a;
316
+ C.ia = C.ja = result->ija;
317
+ C.a = result->a;
318
+
319
+ A.diag = B.diag = C.diag = true;
320
+
321
+ // Do the multiplication
322
+ SmmpFuncs[dtype][left->index_dtype](result->shape[0], result->shape[1], A, B, C);
323
+
324
+ return nm_create(S_YALE, result);
325
+ }
326
+
327
+
328
+ static NMATRIX* multiply_matrix_list_casted(STORAGE_PAIR casted_storage, size_t* resulting_shape) {
329
+ rb_raise(rb_eNotImpError, "multiplication not implemented for list-of-list matrices");
330
+ free(resulting_shape);
331
+ return NULL;
332
+ }
333
+
334
+
335
+ nm_matrix_multiply_op_t CastedMultiplyFuncs = {
336
+ multiply_matrix_dense_casted,
337
+ multiply_matrix_list_casted,
338
+ multiply_matrix_yale_casted
339
+ };
340
+
341
+
342
+ nm_d_elementwise_binary_op_t DenseElementwiseFuncs = { // only for dense!
343
+ NULL,
344
+ nm_d_b_elementwise,
345
+ nm_d_i8_elementwise,
346
+ nm_d_i16_elementwise,
347
+ nm_d_i32_elementwise,
348
+ nm_d_i64_elementwise,
349
+ nm_d_f32_elementwise,
350
+ nm_d_f64_elementwise,
351
+ nm_d_c64_elementwise,
352
+ nm_d_c128_elementwise,
353
+ nm_d_r32_elementwise,
354
+ nm_d_r64_elementwise,
355
+ nm_d_r128_elementwise,
356
+ nm_d_v_elementwise,
357
+ NULL
358
+ };
359
+
360
+ static void EwTypeErr(y_size_t n, enum NMatrix_Ops op, void* ija, void* ijb, void* ijc, void* a, void* b, void* c) {
361
+ rb_raise(nm_eDataTypeError, "illegal operation with this matrix type");
362
+ }
363
+
364
+ // First dimension is dtype, second dimension is index dtype (so lots of nulls)
365
+ nm_y_elementwise_binary_op_t YaleElementwiseFuncs = { // only for yale!
366
+ {EwTypeErr, EwTypeErr, EwTypeErr, EwTypeErr, EwTypeErr, EwTypeErr},
367
+ {EwTypeErr, EwTypeErr, EwTypeErr, EwTypeErr, EwTypeErr, EwTypeErr},
368
+ {EwTypeErr, EwTypeErr, i8_i8_ew, i16_i8_ew, i32_i8_ew, i64_i8_ew},
369
+ {EwTypeErr, EwTypeErr, i8_i16_ew, i16_i16_ew, i32_i16_ew, i64_i16_ew},
370
+ {EwTypeErr, EwTypeErr, i8_i32_ew, i16_i32_ew, i32_i32_ew, i64_i32_ew},
371
+ {EwTypeErr, EwTypeErr, i8_i64_ew, i16_i64_ew, i32_i64_ew, i64_i64_ew},
372
+ {EwTypeErr, EwTypeErr, i8_f32_ew, i16_f32_ew, i32_f32_ew, i64_f32_ew},
373
+ {EwTypeErr, EwTypeErr, i8_f64_ew, i16_f64_ew, i32_f64_ew, i64_f64_ew},
374
+ {EwTypeErr, EwTypeErr, i8_c64_ew, i16_c64_ew, i32_c64_ew, i64_c64_ew},
375
+ {EwTypeErr, EwTypeErr, i8_c128_ew,i16_c128_ew,i32_c128_ew,i64_c128_ew},
376
+ {EwTypeErr, EwTypeErr, i8_r32_ew, i16_r32_ew, i32_r32_ew, i64_r32_ew},
377
+ {EwTypeErr, EwTypeErr, i8_r64_ew, i16_r64_ew, i32_r64_ew, i64_r64_ew},
378
+ {EwTypeErr, EwTypeErr, i8_r128_ew,i16_r128_ew,i32_r128_ew,i64_r128_ew},
379
+ {EwTypeErr, EwTypeErr, i8_v_ew, i16_v_ew, i32_v_ew, i64_v_ew}
380
+ };
381
+
382
+
383
+ static NMATRIX* elementwise_dense_casted(STORAGE_PAIR casted_storage, char op) {
384
+ DENSE_STORAGE *left = (DENSE_STORAGE*)(casted_storage.left),
385
+ *right = (DENSE_STORAGE*)(casted_storage.right),
386
+ *result;
387
+
388
+ // We can safely get dtype from the casted matrices; post-condition of binary_storage_cast_alloc is that dtype is the
389
+ // same for left and right.
390
+ size_t i;
391
+ int8_t dtype = left->dtype;
392
+
393
+ // Setup matrix shape for result
394
+ size_t* shape = ALLOC_N(size_t, left->rank);
395
+ for (i = 0; i < left->rank; ++i) shape[i] = left->shape[i];
396
+
397
+ // Create result storage.
398
+ result = create_dense_storage(dtype, shape, left->rank, NULL, 0);
399
+
400
+ // Do the operation
401
+ DenseElementwiseFuncs[dtype](left->elements, right->elements, result->elements, count_dense_storage_elements(result), op);
402
+
403
+ return nm_create(S_DENSE, result);
404
+ }
405
+
406
+
407
+ static NMATRIX* elementwise_list_casted(STORAGE_PAIR casted_storage, char op) {
408
+ rb_raise(rb_eNotImpError, "elementwise operations not implemented for list-of-list matrices");
409
+ return NULL;
410
+ }
411
+
412
+
413
+ static NMATRIX* elementwise_yale_casted(STORAGE_PAIR casted_storage, char op) {
414
+ YALE_STORAGE *left = (YALE_STORAGE*)(casted_storage.left),
415
+ *right = (YALE_STORAGE*)(casted_storage.right);
416
+ YALE_STORAGE *result = create_merged_yale_storage(left, right);
417
+
418
+ fprintf(stderr, "result: %d, %d\n", result->dtype, result->index_dtype);
419
+
420
+ //fprintf(stderr, "Remember to fix elementwise for yale!\n");
421
+ YaleElementwiseFuncs[result->dtype][result->index_dtype](result->shape[0], result->shape[1], op, left->ija, right->ija, result->ija, left->a, right->a, result->a);
422
+
423
+ return nm_create(S_YALE, result);
424
+ }
425
+
426
+
427
+ nm_elementwise_binary_op_casted_t CastedElementwiseFuncs = {
428
+ elementwise_dense_casted,
429
+ elementwise_list_casted,
430
+ elementwise_yale_casted
431
+ };
432
+
433
+
434
+ nm_compare_t EqEqFuncs = {
435
+ dense_storage_eqeq,
436
+ list_storage_eqeq,
437
+ yale_storage_eqeq
438
+ };
439
+
440
+
441
+ static void nm_delete(NMATRIX* mat) {
442
+ DeleteFuncs[mat->stype](mat->storage);
443
+ }
444
+
445
+
446
+ static STORAGE* nm_dense_new(size_t* shape, size_t rank, int8_t dtype, void* init_val, size_t init_val_len, VALUE self) {
447
+ return (STORAGE*)(create_dense_storage(dtype, shape, rank, init_val, init_val_len));
448
+ }
449
+
450
+ static STORAGE* nm_list_new(size_t* shape, size_t rank, int8_t dtype, void* init_val, size_t init_val_len, VALUE self) {
451
+ if (init_val_len > 1) {
452
+ rb_raise(rb_eArgError, "list storage needs initial size, not initial value\n");
453
+ return NULL;
454
+ }
455
+ return (STORAGE*)(create_list_storage(dtype, shape, rank, init_val));
456
+ }
457
+
458
+
459
+ static STORAGE* nm_yale_new(size_t* shape, size_t rank, int8_t dtype, void* init_val, size_t init_val_len, VALUE self) {
460
+ YALE_STORAGE* s;
461
+
462
+ if (init_val_len > 1) {
463
+ rb_raise(rb_eArgError, "list storage needs initial size, not initial value\n");
464
+ return NULL;
465
+ }
466
+
467
+ s = create_yale_storage(dtype, shape, rank, *(size_t*)init_val);
468
+ init_yale_storage(s);
469
+ free(init_val);
470
+
471
+ if (!s) rb_raise(rb_eNoMemError, "Yale allocation failed");
472
+
473
+ return (STORAGE*)(s);
474
+ //return Data_Wrap_Struct(self, NULL, nm_delete, matrix);
475
+ }
476
+
477
+
478
+ nm_create_storage_t CreateFuncs = {
479
+ nm_dense_new,
480
+ nm_list_new,
481
+ nm_yale_new
482
+ };
483
+
484
+
485
+ nm_cast_copy_storage_t CastCopyFuncs = {
486
+ cast_copy_dense_storage,
487
+ cast_copy_list_storage,
488
+ cast_copy_yale_storage
489
+ };
490
+
491
+
492
+
493
+ nm_scast_copy_storage_t ScastCopyFuncs = {
494
+ {cast_copy_dense_storage, scast_copy_dense_list, scast_copy_dense_yale},
495
+ {scast_copy_list_dense, cast_copy_list_storage, scast_copy_list_yale},
496
+ {scast_copy_yale_dense, scast_copy_yale_list, cast_copy_yale_storage}
497
+ };
498
+
499
+
500
+ nm_stype_ref_t RefFuncs = {
501
+ dense_storage_get,
502
+ list_storage_get,
503
+ yale_storage_ref
504
+ };
505
+
506
+
507
+ VALUE nm_dense_set(STORAGE* s, size_t* coords, VALUE val) {
508
+ void* v = ALLOCA_N(char, nm_sizeof[s->dtype]);
509
+ SetFuncs[s->dtype][NM_ROBJ](1, v, 0, &val, 0);
510
+ dense_storage_set( (DENSE_STORAGE*)s, coords, v );
511
+ return val;
512
+ }
513
+
514
+
515
+ // Should work exactly the same as nm_dense_set.
516
+ VALUE nm_yale_set(STORAGE* s, size_t* coords, VALUE val) {
517
+ void* v = ALLOCA_N(char, nm_sizeof[s->dtype]);
518
+ SetFuncs[s->dtype][NM_ROBJ](1, v, 0, &val, 0);
519
+ yale_storage_set( (YALE_STORAGE*)s, coords, v );
520
+ return val;
521
+ }
522
+
523
+
524
+ // TODO: Why can't you be more like your brothers, nm_dense_set and nm_yale_set?
525
+ VALUE nm_list_set(STORAGE* s, size_t* coords, VALUE val) {
526
+ void *v = ALLOC_N(char, nm_sizeof[s->dtype]), *rm;
527
+ LIST_STORAGE* ls = (LIST_STORAGE*)s;
528
+
529
+ //fprintf(stderr, " create_val: %p\n", v);
530
+
531
+ SetFuncs[s->dtype][NM_ROBJ](1, v, 0, &val, 0);
532
+
533
+ if (!memcmp(ls->default_val, v, nm_sizeof[s->dtype])) {
534
+ // User asked to insert default_value, which is actually node *removal*.
535
+ // So let's do that instead.
536
+
537
+ rm = list_storage_remove( ls, coords );
538
+
539
+ //if (rm) fprintf(stderr, " remove_val: %p\n", rm);
540
+
541
+ if (rm) free(rm);
542
+ return val;
543
+
544
+ } else if (list_storage_insert( ls, coords, v )) return val;
545
+ return Qnil;
546
+ // No need to free; the list keeps v.
547
+ }
548
+
549
+
550
+
551
+ nm_stype_ins_t InsFuncs = {
552
+ nm_dense_set,
553
+ nm_list_set,
554
+ nm_yale_set,
555
+ };
556
+
557
+
558
+
559
+ // Converts a typestring to a typecode for storage. Only looks at the first three characters.
560
+ int8_t nm_stypestring_to_stype(VALUE str) {
561
+ int8_t i;
562
+ for (i = 0; i < S_TYPES; ++i)
563
+ if ( !strncmp(RSTRING_PTR(str), nm_stypestring[i], 3) ) return i;
564
+ return S_DENSE;
565
+ }
566
+
567
+ int8_t nm_stypesymbol_to_stype(VALUE sym) {
568
+ int8_t i;
569
+ for (i = 0; i < S_TYPES; ++i)
570
+ if (SYM2ID(sym) == rb_intern(nm_stypestring[i])) return i;
571
+ return S_DENSE;
572
+ }
573
+
574
+
575
+ int8_t nm_dtypestring_to_dtype(VALUE str) {
576
+ int8_t i;
577
+ for (i = 0; i < NM_TYPES; ++i)
578
+ if ( !strncmp(RSTRING_PTR(str), nm_dtypestring[i], RSTRING_LEN(str)) ) return i;
579
+ return NM_NONE;
580
+ }
581
+
582
+ int8_t nm_dtypesymbol_to_dtype(VALUE sym) {
583
+ int8_t i;
584
+ for (i = 0; i < NM_TYPES; ++i)
585
+ if (SYM2ID(sym) == rb_intern(nm_dtypestring[i])) return i;
586
+ return NM_NONE;
587
+ }
588
+
589
+
590
+ // TODO: Probably needs some work for Bignum.
591
+ int8_t nm_guess_dtype(VALUE v) {
592
+ switch(TYPE(v)) {
593
+ case T_TRUE:
594
+ case T_FALSE:
595
+ return NM_BYTE;
596
+ case T_STRING:
597
+ if (RSTRING_LEN(v) == 1) return NM_BYTE;
598
+ else return NM_NONE;
599
+
600
+ #if SIZEOF_INT == 8
601
+ case T_FIXNUM:
602
+ return NM_INT64;
603
+ case T_RATIONAL:
604
+ return NM_RATIONAL128;
605
+ #else
606
+ # if SIZEOF_INT == 4
607
+ case T_FIXNUM:
608
+ return NM_INT32;
609
+ case T_RATIONAL:
610
+ return NM_RATIONAL64;
611
+ #else
612
+ case T_FIXNUM:
613
+ return NM_INT16;
614
+ case T_RATIONAL:
615
+ return NM_RATIONAL32;
616
+ # endif
617
+ #endif
618
+
619
+ case T_BIGNUM:
620
+ return NM_INT64;
621
+
622
+ #if SIZEOF_FLOAT == 4
623
+ case T_COMPLEX:
624
+ return NM_COMPLEX128;
625
+ case T_FLOAT:
626
+ return NM_FLOAT64;
627
+ #else
628
+ # if SIZEOF_FLOAT == 2
629
+ case T_COMPLEX:
630
+ return NM_COMPLEX64;
631
+ case T_FLOAT:
632
+ return NM_FLOAT32;
633
+ # endif
634
+ #endif
635
+
636
+ case T_ARRAY: // may be passed for dense -- for now, just look at the first element.
637
+ return nm_guess_dtype(RARRAY_PTR(v)[0]);
638
+ // TODO: Look at entire array for most specific type.
639
+
640
+ case T_NIL:
641
+ default:
642
+ return NM_NONE;
643
+ }
644
+ }
645
+
646
+ // Used for scasting (changing stype)
647
+ inline void cast_copy_value_single(void* to, const void* from, int8_t l_dtype, int8_t r_dtype) {
648
+ if (l_dtype == r_dtype) memcpy(to, from, nm_sizeof[l_dtype]);
649
+ else SetFuncs[l_dtype][r_dtype](1, to, 0, from, 0);
650
+ }
651
+
652
+
653
+
654
+ // Read the shape argument to NMatrix.new, which may be either an array or a single number.
655
+ // Second argument is where the shape is stored at the end of the function; returns the rank.
656
+ // You are responsible for freeing shape!
657
+ size_t* nm_interpret_shape_arg(VALUE arg, size_t* rank) {
658
+ size_t i;
659
+ size_t* shape;
660
+
661
+ if (TYPE(arg) == T_ARRAY) {
662
+ *rank = RARRAY_LEN(arg);
663
+ shape = ALLOC_N(size_t, *rank);
664
+ for (i = 0; i < *rank; ++i)
665
+ shape[i] = (size_t)(FIX2UINT(RARRAY_PTR(arg)[i]));
666
+ } else if (FIXNUM_P(arg)) {
667
+ *rank = 2;
668
+ shape = ALLOC_N(size_t, *rank);
669
+ for (i = 0; i < *rank; ++i)
670
+ shape[i] = (size_t)(FIX2UINT(arg));
671
+ } else {
672
+ *rank = 0;
673
+ shape = NULL;
674
+ rb_raise(rb_eArgError, "Expected an array of numbers or a single fixnum for matrix shape");
675
+ }
676
+
677
+ return shape;
678
+ }
679
+
680
+
681
+ // argv will be either 1 or 2 elements. If 1, could be either initial or dtype. If 2, is initial and dtype.
682
+ // This function returns the dtype.
683
+ int8_t nm_interpret_dtype(int argc, VALUE* argv, int8_t stype) {
684
+ int offset = 0; // if argc == 1
685
+ if (argc == 2) offset = 1;
686
+ else if (argc != 1) rb_raise(rb_eArgError, "Need an initial value or a dtype");
687
+
688
+ if (SYMBOL_P(argv[offset])) return nm_dtypesymbol_to_dtype(argv[offset]);
689
+ else if (IS_STRING(argv[offset])) return nm_dtypestring_to_dtype(StringValue(argv[offset]));
690
+ else if (stype == S_YALE) rb_raise(rb_eArgError, "yale requires dtype");
691
+ else return nm_guess_dtype(argv[0]);
692
+
693
+ return NM_NONE;
694
+ }
695
+
696
+ int8_t nm_interpret_stype(VALUE arg) {
697
+ if (SYMBOL_P(arg)) return nm_stypesymbol_to_stype(arg);
698
+ else if (IS_STRING(arg)) return nm_stypestring_to_stype(StringValue(arg));
699
+ else rb_raise(rb_eArgError, "Expected storage type");
700
+ return S_DENSE;
701
+ }
702
+
703
+
704
+ void* nm_interpret_initial_value(VALUE arg, int8_t dtype) {
705
+ void* init_val;
706
+
707
+ if (TYPE(arg) == T_ARRAY) { // array
708
+ init_val = ALLOC_N(char, nm_sizeof[dtype] * RARRAY_LEN(arg));
709
+ SetFuncs[dtype][NM_ROBJ](RARRAY_LEN(arg), init_val, nm_sizeof[dtype], RARRAY_PTR(arg), nm_sizeof[NM_ROBJ]);
710
+ } else { // single value
711
+ init_val = ALLOC_N(char, nm_sizeof[dtype]);
712
+ SetFuncs[dtype][NM_ROBJ](1, init_val, 0, &arg, 0);
713
+ }
714
+
715
+ return init_val;
716
+ }
717
+
718
+
719
+ size_t* nm_interpret_initial_capacity(VALUE arg) {
720
+ size_t* init_cap = ALLOC(size_t);
721
+ *init_cap = FIX2UINT(arg);
722
+ return init_cap;
723
+ }
724
+
725
+
726
+ /*
727
+ * Create a new NMatrix.
728
+ *
729
+ * There are several ways to do this. At a minimum, dimensions and either a dtype or initial values are needed, e.g.,
730
+ *
731
+ * NMatrix.new(3, :int64) # square 3x3 dense matrix
732
+ * NMatrix.new([3,4], :float32) # 3x4 matrix
733
+ * NMatrix.new(3, 0) # 3x3 dense matrix initialized to all zeros
734
+ * NMatrix.new([3,3], [1,2,3]) # [[1,2,3],[1,2,3],[1,2,3]]
735
+ *
736
+ * NMatrix will try to guess the dtype from the first value in the initial values array.
737
+ *
738
+ * You can also provide the stype prior to the dimensions. However, non-dense matrices cannot take initial values, and
739
+ * require a dtype (e.g., :int64):
740
+ *
741
+ * NMatrix.new(:yale, [4,3], :int64)
742
+ * NMatrix.new(:list, 5, :rational128)
743
+ *
744
+ * Finally, you can be extremely specific, and define a matrix very exactly:
745
+ *
746
+ * NMatrix.new(:dense, [2,2,2], [0,1,2,3,4,5,6,7], :int8)
747
+ *
748
+ * Just be careful! There are no overflow warnings in NMatrix.
749
+ */
750
+ static VALUE nm_init(int argc, VALUE* argv, VALUE nm) {
751
+ char ZERO = 0;
752
+ VALUE QNIL = Qnil;
753
+ int8_t dtype, stype, offset = 0;
754
+ size_t rank;
755
+ size_t* shape;
756
+ size_t init_val_len = 0;
757
+ void* init_val = NULL;
758
+ NMATRIX* nmatrix;
759
+
760
+ // READ ARGUMENTS
761
+
762
+ //fprintf(stderr, "Called nmatrix new with %d arguments\n", argc);
763
+
764
+ if (argc < 2 || argc > 4) { rb_raise(rb_eArgError, "Expected 2, 3, or 4 arguments"); return Qnil; }
765
+
766
+ if (!SYMBOL_P(argv[0]) && !IS_STRING(argv[0])) {
767
+ stype = S_DENSE;
768
+ } else {
769
+ stype = nm_interpret_stype(argv[0]); // 0: String or Symbol
770
+ offset = 1;
771
+ }
772
+ shape = nm_interpret_shape_arg(argv[offset], &rank); // 1: String or Symbol
773
+ dtype = nm_interpret_dtype(argc-1-offset, argv+offset+1, stype); // 2-3: dtype
774
+
775
+ if (IS_NUMERIC(argv[1+offset]) || TYPE(argv[1+offset]) == T_ARRAY) { // initial value provided (could also be initial capacity, if yale)
776
+ if (stype == S_YALE) {
777
+ init_val = nm_interpret_initial_capacity(argv[1+offset]);
778
+ init_val_len = 1;
779
+ } else {
780
+ init_val = nm_interpret_initial_value(argv[1+offset], dtype);// 4: initial value / dtype
781
+ if (TYPE(argv[1+offset]) == T_ARRAY) init_val_len = RARRAY_LEN(argv[1+offset]);
782
+ else init_val_len = 1;
783
+ }
784
+ } else {
785
+ if (stype == S_DENSE) { // no need to initialize dense with any kind of default value unless it's an NM_ROBJ matrix
786
+ if (dtype == NM_ROBJ) { // pretend [nil] was passed for ROBJ.
787
+ init_val = ALLOC(VALUE);
788
+ SetFuncs[NM_ROBJ][NM_ROBJ](1, init_val, 0, &QNIL, 0);
789
+ init_val_len = 1;
790
+ } else init_val = NULL;
791
+ } else if (stype == S_YALE) { // if it's a list or compressed, we want to assume default of 0 even if none provided
792
+ init_val = ALLOC(size_t);
793
+ *(size_t*)init_val = 0;
794
+ } else {
795
+ init_val = ALLOC_N(char, nm_sizeof[dtype]);
796
+ //memset(init_val, 0, nm_sizeof[dtype]); // TODO: See if this works instead of the next line (with NM_ROBJ matrix). Cleaner.
797
+ SetFuncs[dtype][NM_BYTE](1, init_val, 0, &ZERO, 0);
798
+ }
799
+ }
800
+
801
+
802
+ // TODO: Update to allow an array as the initial value.
803
+
804
+ if (dtype == NM_NONE) {
805
+ rb_raise(rb_eArgError, "Could not recognize dtype");
806
+ free(init_val);
807
+ free(shape);
808
+ return nm;
809
+ }
810
+
811
+ if (stype < S_TYPES) {
812
+ UnwrapNMatrix( nm, nmatrix );
813
+
814
+ nmatrix->stype = stype;
815
+ nmatrix->storage = CreateFuncs[stype](shape, rank, dtype, init_val, init_val_len, nm);
816
+
817
+ return nm;
818
+ } else
819
+ rb_raise(rb_eNotImpError, "Unrecognized storage type");
820
+
821
+ free(shape);
822
+ free(init_val);
823
+ return nm;
824
+ }
825
+
826
+
827
+ static VALUE nm_alloc(VALUE klass) {
828
+ NMATRIX* mat = ALLOC(NMATRIX);
829
+ mat->storage = NULL;
830
+ mat->stype = S_TYPES;
831
+ return Data_Wrap_Struct(klass, MarkFuncs[mat->stype], nm_delete, mat);
832
+ }
833
+
834
+
835
+ // This is the "back-door initializer," for when Ruby needs to create the object in an atypical way.
836
+ //
837
+ // Note that objects created this way will have NULL storage.
838
+ /*static VALUE nm_initialize(VALUE self, VALUE stype, VALUE dtype) {
839
+ NMATRIX* matrix;
840
+ UnwrapNMatrix(self, matrix);
841
+
842
+ matrix->stype = nm_interpret_stype(stype);
843
+ matrix->dtype = nm_interpret_dtype(1, &dtype, stype);
844
+ matrix->storage = NULL;
845
+
846
+ return self;
847
+ }*/
848
+
849
+
850
+ static VALUE nm_init_copy(VALUE copy, VALUE original) {
851
+ NMATRIX *lhs, *rhs;
852
+
853
+ CheckNMatrixType(original);
854
+
855
+ if (copy == original) return copy;
856
+
857
+ UnwrapNMatrix( original, rhs );
858
+ UnwrapNMatrix( copy, lhs );
859
+
860
+ lhs->stype = rhs->stype;
861
+
862
+ // Copy the storage
863
+ lhs->storage = CastCopyFuncs[rhs->stype](rhs->storage, rhs->storage->dtype);
864
+
865
+ return copy;
866
+ }
867
+
868
+
869
+ static VALUE nm_init_cast_copy(VALUE copy, VALUE original, VALUE new_dtype_symbol) {
870
+ NMATRIX *lhs, *rhs;
871
+ int8_t new_dtype = nm_dtypesymbol_to_dtype(new_dtype_symbol);
872
+ //fprintf(stderr,"In copy constructor\n");
873
+
874
+ CheckNMatrixType(original);
875
+
876
+ if (copy == original) return copy;
877
+
878
+ UnwrapNMatrix( original, rhs );
879
+ UnwrapNMatrix( copy, lhs );
880
+
881
+ lhs->stype = rhs->stype;
882
+
883
+ // Copy the storage
884
+ lhs->storage = CastCopyFuncs[rhs->stype](rhs->storage, new_dtype);
885
+
886
+ return copy;
887
+ }
888
+
889
+
890
+ /*
891
+ * Create a copy of an NMatrix with a different dtype. See also cast.
892
+ */
893
+ // TODO: Deprecate this function and farm it out to scast_copy. as_dtype will still work, but it'll be in pure Ruby and
894
+ // just use ::cast instead.
895
+ static VALUE nm_cast_copy(VALUE self, VALUE new_dtype_symbol) {
896
+ NMATRIX *original, *copy;
897
+ int8_t new_dtype = nm_dtypesymbol_to_dtype(new_dtype_symbol);
898
+
899
+ CheckNMatrixType(self);
900
+
901
+ UnwrapNMatrix(self, original);
902
+
903
+ copy = ALLOC(NMATRIX);
904
+ copy->stype = original->stype;
905
+ copy->storage = CastCopyFuncs[original->stype](original->storage, new_dtype);
906
+
907
+ return Data_Wrap_Struct(cNMatrix, MarkFuncs[copy->stype], nm_delete, copy);
908
+ }
909
+
910
+
911
+ /*
912
+ * Create a copy of an NMatrix with a different stype and dtype. See also cast.
913
+ *
914
+ * m.cast(:dense, :int64)
915
+ *
916
+ */
917
+ static VALUE nm_scast_copy(VALUE self, VALUE new_stype_symbol, VALUE new_dtype_symbol) {
918
+ NMATRIX* original, *copy;
919
+ int8_t new_dtype = nm_dtypesymbol_to_dtype(new_dtype_symbol);
920
+ int8_t new_stype = nm_stypesymbol_to_stype(new_stype_symbol);
921
+
922
+ CheckNMatrixType(self);
923
+
924
+ UnwrapNMatrix(self, original);
925
+
926
+ copy = ALLOC(NMATRIX);
927
+ copy->stype = new_stype;
928
+
929
+ // Copy and scast the storage.
930
+ if (new_stype == original->stype) copy->storage = CastCopyFuncs[original->stype](original->storage, new_dtype);
931
+ else copy->storage = ScastCopyFuncs[copy->stype][original->stype](original->storage, new_dtype);
932
+
933
+ return Data_Wrap_Struct(cNMatrix, MarkFuncs[copy->stype], nm_delete, copy);
934
+ }
935
+
936
+
937
+
938
+ // Cast a single matrix to a new dtype (unless it's already casted, then just return it). Helper for binary_storage_cast_alloc.
939
+ static inline STORAGE* storage_cast_alloc(NMATRIX* matrix, int8_t new_dtype) {
940
+ if (matrix->storage->dtype == new_dtype) return matrix->storage;
941
+ else return CastCopyFuncs[matrix->stype](matrix->storage, new_dtype);
942
+ }
943
+
944
+
945
+ // Cast a pair of matrices for a binary operation to a new dtype (which this function determines). Technically, only
946
+ // does an actual cast on matrices that are the wrong dtype; otherwise returns a reference to the original. Bear this in
947
+ // mind when freeing memory!
948
+ static inline STORAGE_PAIR binary_storage_cast_alloc(NMATRIX* left_matrix, NMATRIX* right_matrix) {
949
+ STORAGE_PAIR casted;
950
+ int8_t new_dtype = Upcast[left_matrix->storage->dtype][right_matrix->storage->dtype];
951
+
952
+ casted.left = storage_cast_alloc(left_matrix, new_dtype);
953
+ casted.right = storage_cast_alloc(right_matrix, new_dtype);
954
+
955
+ return casted;
956
+ }
957
+
958
+ /*
959
+ * Equality operator. Returns a single true or false value indicating whether the matrices are equivalent.
960
+ *
961
+ * For elementwise, use == instead.
962
+ *
963
+ * This method will raise an exception if dimensions do not match.
964
+ */
965
+ static VALUE nm_eqeq(VALUE left, VALUE right) {
966
+ bool result;
967
+ NMATRIX *l, *r;
968
+ STORAGE_PAIR casted;
969
+
970
+ CheckNMatrixType(left);
971
+ CheckNMatrixType(right);
972
+
973
+ UnwrapNMatrix(left, l);
974
+ UnwrapNMatrix(right, r);
975
+
976
+ if (l->stype != r->stype) //rb_raise(nm_eStorageTypeError, "wrong storage type");
977
+ rb_raise(rb_eNotImpError, "comparison between different matrix stypes not yet implemented");
978
+
979
+ casted = binary_storage_cast_alloc(l, r);
980
+
981
+ result = EqEqFuncs[l->stype](casted.left, casted.right);
982
+
983
+ // Free any casted-storage we created for the comparison.
984
+ // TODO: Can we make the Ruby GC take care of this stuff now that we're using it?
985
+ // If we did that, we night not have to re-create these every time, right? Or wrong? Need to do
986
+ // more research.
987
+ if (l->storage != casted.left) DeleteFuncs[l->stype](casted.left);
988
+ if (r->storage != casted.right) DeleteFuncs[l->stype](casted.right);
989
+
990
+ return result ? Qtrue : Qfalse;
991
+ }
992
+
993
+
994
+ static VALUE multiply_matrix(NMATRIX* left, NMATRIX* right) {
995
+ ///TODO: multiplication for non-dense and/or non-decimal matrices
996
+ size_t* resulting_shape = ALLOC_N(size_t, 2);
997
+ NMATRIX* result;
998
+ bool vector = false;
999
+
1000
+ // Make sure both of our matrices are of the correct type.
1001
+ STORAGE_PAIR casted = binary_storage_cast_alloc(left, right);
1002
+
1003
+ resulting_shape[0] = left->storage->shape[0];
1004
+ resulting_shape[1] = right->storage->shape[1];
1005
+
1006
+ // Sometimes we only need to use matrix-vector multiplication (e.g., GEMM versus GEMV). Find out.
1007
+ if (resulting_shape[1] == 1) vector = true;
1008
+
1009
+ result = CastedMultiplyFuncs[left->stype](casted, resulting_shape, vector);
1010
+
1011
+ // Free any casted-storage we created for the multiplication.
1012
+ // TODO: Can we make the Ruby GC take care of this stuff now that we're using it?
1013
+ // If we did that, we night not have to re-create these every time, right? Or wrong? Need to do
1014
+ // more research.
1015
+ if (left->storage != casted.left) DeleteFuncs[left->stype](casted.left);
1016
+ if (right->storage != casted.right) DeleteFuncs[left->stype](casted.right);
1017
+
1018
+ if (result) return Data_Wrap_Struct(cNMatrix, MarkFuncs[result->stype], nm_delete, result);
1019
+ return Qnil; // Only if we try to multiply list matrices should we return Qnil.
1020
+ }
1021
+
1022
+
1023
+ static VALUE multiply_scalar(NMATRIX* left, VALUE scalar) {
1024
+ rb_raise(rb_eNotImpError, "matrix-scalar multiplication not implemented yet");
1025
+ return Qnil;
1026
+ }
1027
+
1028
+
1029
+ /*
1030
+ * Matrix multiply (dot product): against another matrix or a vector.
1031
+ *
1032
+ * For elementwise, use * instead.
1033
+ *
1034
+ * The two matrices must be of the same stype (for now). If dtype differs, an upcast will occur.
1035
+ */
1036
+ static VALUE nm_multiply(VALUE left_v, VALUE right_v) {
1037
+ NMATRIX *left, *right;
1038
+
1039
+ // left has to be of type NMatrix.
1040
+ CheckNMatrixType(left_v);
1041
+
1042
+ UnwrapNMatrix( left_v, left );
1043
+
1044
+ if (IS_NUMERIC(right_v))
1045
+ return multiply_scalar(left, right_v);
1046
+
1047
+ else if (TYPE(right_v) == T_ARRAY)
1048
+ rb_raise(rb_eNotImpError, "for matrix-vector multiplication, please use an NVector instead of an Array for now");
1049
+
1050
+ //if (RDATA(right_v)->dfree != (RUBY_DATA_FUNC)nm_delete) {
1051
+ else if (TYPE(right_v) == T_DATA && RDATA(right_v)->dfree == (RUBY_DATA_FUNC)nm_delete) { // both are matrices
1052
+ UnwrapNMatrix( right_v, right );
1053
+
1054
+ if (left->storage->shape[1] != right->storage->shape[0])
1055
+ rb_raise(rb_eArgError, "incompatible dimensions");
1056
+
1057
+ if (left->stype != right->stype)
1058
+ rb_raise(rb_eNotImpError, "matrices must have same stype");
1059
+
1060
+ return multiply_matrix(left, right);
1061
+
1062
+ } else rb_raise(rb_eTypeError, "expected right operand to be NMatrix, NVector, or single numeric value");
1063
+
1064
+ return Qnil;
1065
+ }
1066
+
1067
+
1068
+ static VALUE nm_elementwise(VALUE leftv, VALUE rightv, char op) {
1069
+ ///TODO: multiplication for non-dense and/or non-decimal matrices
1070
+ NMATRIX *result, *left, *right;
1071
+ STORAGE_PAIR casted;
1072
+
1073
+ CheckNMatrixType(leftv);
1074
+ CheckNMatrixType(rightv);
1075
+
1076
+ UnwrapNMatrix(rightv, right);
1077
+ UnwrapNMatrix(leftv, left);
1078
+
1079
+ // Make sure both of our matrices are of the correct type.
1080
+ casted = binary_storage_cast_alloc(left, right);
1081
+
1082
+ result = CastedElementwiseFuncs[left->stype](casted, op);
1083
+
1084
+ // Free up temporary casted matrices
1085
+ if (left->storage != casted.left) DeleteFuncs[left->stype](casted.left);
1086
+ if (right->storage != casted.right) DeleteFuncs[left->stype](casted.right);
1087
+
1088
+ if (result) return Data_Wrap_Struct(cNMatrix, MarkFuncs[result->stype], nm_delete, result);
1089
+ return Qnil; // Only if we try to multiply list matrices should we return Qnil.
1090
+ }
1091
+
1092
+
1093
+ /*
1094
+ * Matrix element-wise addition.
1095
+ *
1096
+ * The two matrices must be of the same stype (for now). If dtype differs, an upcast will occur.
1097
+ *
1098
+ * Not available for list matrices. You should cast to a yale or dense matrix first.
1099
+ */
1100
+ static VALUE nm_ew_add(VALUE left, VALUE right) {
1101
+ return nm_elementwise(left, right, '+');
1102
+ }
1103
+
1104
+ /*
1105
+ * Matrix element-wise subtraction.
1106
+ *
1107
+ * The two matrices must be of the same stype (for now). If dtype differs, an upcast will occur.
1108
+ *
1109
+ * Not available for list matrices. You should cast to a yale or dense matrix first.
1110
+ */
1111
+ static VALUE nm_ew_subtract(VALUE left, VALUE right) {
1112
+ return nm_elementwise(left, right, '-');
1113
+ }
1114
+
1115
+ /*
1116
+ * Matrix element-wise multiplication.
1117
+ *
1118
+ * The two matrices must be of the same stype (for now). If dtype differs, an upcast will occur.
1119
+ *
1120
+ * Not available for list matrices. You should cast to a yale or dense matrix first.
1121
+ *
1122
+ * For dot product, use +dot+ instead.
1123
+ */
1124
+ static VALUE nm_ew_multiply(VALUE left, VALUE right) {
1125
+ return nm_elementwise(left, right, '*');
1126
+ }
1127
+
1128
+ /*
1129
+ * Matrix element-wise division.
1130
+ *
1131
+ * The two matrices must be of the same stype (for now). If dtype differs, an upcast will occur.
1132
+ *
1133
+ * Not available for list matrices. You should cast to a yale or dense matrix first.
1134
+ */
1135
+ static VALUE nm_ew_divide(VALUE left, VALUE right) {
1136
+ return nm_elementwise(left, right, '/');
1137
+ }
1138
+
1139
+
1140
+ /*
1141
+ * Matrix element-wise comparison (equality) operator.
1142
+ *
1143
+ * The two matrices must be of the same stype (for now). If dtype differs, an upcast will occur.
1144
+ *
1145
+ * Not available for list matrices. You should cast to a yale or dense matrix first.
1146
+ *
1147
+ * Note that the matrix returned will be of the same dtype as the upcast of the input matrices. If that's not what you
1148
+ * want, use +cast+.
1149
+ */
1150
+ static VALUE nm_ew_eqeq(VALUE left, VALUE right) {
1151
+ return nm_elementwise(left, right, NM_OP_EQEQ);
1152
+ }
1153
+
1154
+ /*
1155
+ * Matrix element-wise less-than-or-equals operator.
1156
+ *
1157
+ * The two matrices must be of the same stype (for now). If dtype differs, an upcast will occur.
1158
+ *
1159
+ * Not available for list matrices. You should cast to a yale or dense matrix first.
1160
+ *
1161
+ * Note that the matrix returned will be of the same dtype as the upcast of the input matrices. If that's not what you
1162
+ * want, use +cast+.
1163
+ */
1164
+ static VALUE nm_ew_leq(VALUE left, VALUE right) {
1165
+ return nm_elementwise(left, right, NM_OP_LTE);
1166
+ }
1167
+
1168
+
1169
+ /*
1170
+ * Matrix element-wise greater-than-or-equals operator.
1171
+ *
1172
+ * The two matrices must be of the same stype (for now). If dtype differs, an upcast will occur.
1173
+ *
1174
+ * Not available for list matrices. You should cast to a yale or dense matrix first.
1175
+ *
1176
+ * Note that the matrix returned will be of the same dtype as the upcast of the input matrices. If that's not what you
1177
+ * want, use +cast+.
1178
+ */
1179
+ static VALUE nm_ew_geq(VALUE left, VALUE right) {
1180
+ return nm_elementwise(left, right, NM_OP_GTE);
1181
+ }
1182
+
1183
+
1184
+ /*
1185
+ * Matrix element-wise strictly-less-than operator.
1186
+ *
1187
+ * The two matrices must be of the same stype (for now). If dtype differs, an upcast will occur.
1188
+ *
1189
+ * Not available for list matrices. You should cast to a yale or dense matrix first.
1190
+ *
1191
+ * Note that the matrix returned will be of the same dtype as the upcast of the input matrices. If that's not what you
1192
+ * want, use +cast+.
1193
+ */
1194
+ static VALUE nm_ew_lt(VALUE left, VALUE right) {
1195
+ return nm_elementwise(left, right, '<');
1196
+ }
1197
+
1198
+
1199
+ /*
1200
+ * Matrix element-wise strictly-greater-than operator.
1201
+ *
1202
+ * The two matrices must be of the same stype (for now). If dtype differs, an upcast will occur.
1203
+ *
1204
+ * Not available for list matrices. You should cast to a yale or dense matrix first.
1205
+ *
1206
+ * Note that the matrix returned will be of the same dtype as the upcast of the input matrices. If that's not what you
1207
+ * want, use +cast+.
1208
+ */
1209
+ static VALUE nm_ew_gt(VALUE left, VALUE right) {
1210
+ return nm_elementwise(left, right, '>');
1211
+ }
1212
+
1213
+
1214
+ /*
1215
+ * Matrix element-wise inequality operator.
1216
+ *
1217
+ * The two matrices must be of the same stype (for now). If dtype differs, an upcast will occur.
1218
+ *
1219
+ * Not available for list matrices. You should cast to a yale or dense matrix first.
1220
+ *
1221
+ * Note that the matrix returned will be of the same dtype as the upcast of the input matrices. If that's not what you
1222
+ * want, use +cast+.
1223
+ */
1224
+ static VALUE nm_ew_neq(VALUE left, VALUE right) {
1225
+ return nm_elementwise(left, right, NM_OP_NEQ);
1226
+ }
1227
+
1228
+
1229
+ // Borrowed this function from NArray. Handles 'each' iteration on a dense matrix.
1230
+ //
1231
+ // Additionally, handles separately matrices containing VALUEs and matrices containing
1232
+ // other types of data.
1233
+ static VALUE nm_dense_each(VALUE nmatrix) {
1234
+ DENSE_STORAGE* s = (DENSE_STORAGE*)(NM_STORAGE(nmatrix));
1235
+ VALUE v;
1236
+ size_t i;
1237
+
1238
+ void (*copy)();
1239
+
1240
+ if (NM_DTYPE(nmatrix) == NM_ROBJ) {
1241
+
1242
+ // matrix of Ruby objects -- yield directly
1243
+ for (i = 0; i < count_dense_storage_elements(s); ++i)
1244
+ rb_yield( *((VALUE*)((char*)(s->elements) + i*nm_sizeof[NM_DTYPE(nmatrix)])) );
1245
+
1246
+ } else {
1247
+ // We're going to copy the matrix element into a Ruby VALUE and then operate on it.
1248
+ copy = SetFuncs[NM_ROBJ][NM_DTYPE(nmatrix)];
1249
+
1250
+ for (i = 0; i < count_dense_storage_elements(s); ++i) {
1251
+ (*copy)(1, &v, 0, (char*)(s->elements) + i*nm_sizeof[NM_DTYPE(nmatrix)], 0);
1252
+ rb_yield(v); // yield to the copy we made
1253
+ }
1254
+ }
1255
+
1256
+ return nmatrix;
1257
+ }
1258
+
1259
+
1260
+ /*
1261
+ * Iterate over the matrix as you would an Enumerable (e.g., Array).
1262
+ *
1263
+ * Currently only works for dense.
1264
+ */
1265
+ static VALUE nm_each(VALUE nmatrix) {
1266
+ volatile VALUE nm = nmatrix; // not sure why we do this, but it gets done in ruby's array.c.
1267
+
1268
+ switch(NM_STYPE(nm)) {
1269
+ case S_DENSE:
1270
+ return nm_dense_each(nm);
1271
+ default:
1272
+ rb_raise(rb_eNotImpError, "only dense each works right now");
1273
+ }
1274
+ }
1275
+
1276
+
1277
+ // Does not create storage, but does destroy it.
1278
+ NMATRIX* nm_create(int8_t stype, void* storage) {
1279
+ NMATRIX* mat = ALLOC(NMATRIX);
1280
+
1281
+ mat->stype = stype;
1282
+ mat->storage = storage;
1283
+
1284
+ return mat;
1285
+ }
1286
+
1287
+
1288
+ static size_t* convert_coords(size_t rank, VALUE* c, VALUE self) {
1289
+ size_t r;
1290
+ size_t* coords = ALLOC_N(size_t,rank);
1291
+
1292
+ for (r = 0; r < rank; ++r) {
1293
+ coords[r] = FIX2UINT(c[r]);
1294
+ if (coords[r] >= NM_SHAPE(self,r)) rb_raise(rb_eArgError, "out of range");
1295
+ }
1296
+
1297
+ return coords;
1298
+ }
1299
+
1300
+
1301
+ /*
1302
+ * Access the contents of an NMatrix at given coordinates.
1303
+ *
1304
+ * n[3,3] # => 5.0
1305
+ *
1306
+ */
1307
+ VALUE nm_mref(int argc, VALUE* argv, VALUE self) {
1308
+ VALUE v;
1309
+
1310
+ if (NM_RANK(self) == (size_t)(argc)) {
1311
+
1312
+ SetFuncs[NM_ROBJ][NM_DTYPE(self)](1, &v, 0,
1313
+ RefFuncs[NM_STYPE(self)](NM_STORAGE(self),
1314
+ convert_coords((size_t)(argc), argv, self)
1315
+ ), 0);
1316
+ return v;
1317
+
1318
+ } else if (NM_RANK(self) < (size_t)(argc)) {
1319
+ rb_raise(rb_eArgError, "Coordinates given exceed matrix rank");
1320
+ } else {
1321
+ rb_raise(rb_eNotImpError, "Slicing not supported yet");
1322
+ }
1323
+ return Qnil;
1324
+ }
1325
+
1326
+
1327
+ /*
1328
+ * Modify the contents of an NMatrix in the given cell
1329
+ *
1330
+ * n[3,3] = 5.0
1331
+ *
1332
+ * Also returns the new contents, so you can chain:
1333
+ *
1334
+ * n[3,3] = n[2,3] = 5.0
1335
+ */
1336
+ VALUE nm_mset(int argc, VALUE* argv, VALUE self) {
1337
+ size_t rank = argc - 1; // last arg is the value
1338
+
1339
+ if (argc <= 1) {
1340
+ rb_raise(rb_eArgError, "Expected coordinates and r-value");
1341
+
1342
+ } else if (NM_RANK(self) == rank) {
1343
+ return (*(InsFuncs[NM_STYPE(self)]))( NM_STORAGE(self),
1344
+ convert_coords(rank, argv, self),
1345
+ argv[rank] );
1346
+
1347
+ } else if (NM_RANK(self) < rank) {
1348
+ rb_raise(rb_eArgError, "Coordinates given exceed matrix rank");
1349
+ } else {
1350
+ rb_raise(rb_eNotImpError, "Slicing not supported yet");
1351
+ }
1352
+ return Qnil;
1353
+ }
1354
+
1355
+
1356
+ /*
1357
+ * Get the rank of an NMatrix (the number of dimensions).
1358
+ *
1359
+ * In other words, if you set your matrix to be 3x4, the rank is 2. If the matrix was initialized as 3x4x3, the rank
1360
+ * is 3.
1361
+ *
1362
+ * This function may lie slightly for NVectors, which are internally stored as rank 2 (and have an orientation), but
1363
+ * act as if they're rank 1.
1364
+ */
1365
+ VALUE nm_rank(VALUE self) {
1366
+ VALUE ret;
1367
+ SetFuncs[NM_ROBJ][NM_INT64]( 1, &ret, 0, &(NM_STORAGE(self)->rank), 0 );
1368
+ return ret;
1369
+ }
1370
+
1371
+
1372
+ /*
1373
+ * Get the shape (dimensions) of a matrix.
1374
+ */
1375
+ VALUE nm_shape(VALUE self) {
1376
+ STORAGE* s = NM_STORAGE(self);
1377
+
1378
+ // Copy elements into a VALUE array and then use those to create a Ruby array with rb_ary_new4.
1379
+ VALUE* shape = ALLOCA_N(VALUE, s->rank);
1380
+ SetFuncs[NM_ROBJ][NM_SIZE_T]( s->rank, shape, sizeof(VALUE), s->shape, sizeof(size_t));
1381
+
1382
+ return rb_ary_new4(s->rank, shape);
1383
+ }
1384
+
1385
+
1386
+ /*
1387
+ * Get the storage type (stype) of a matrix, e.g., :yale, :dense, or :list.
1388
+ */
1389
+ static VALUE nm_stype(VALUE self) {
1390
+ ID stype = rb_intern(nm_stypestring[NM_STYPE(self)]);
1391
+ return ID2SYM(stype);
1392
+ }
1393
+
1394
+
1395
+ /*
1396
+ * Get the data type (dtype) of a matrix, e.g., :byte, :int8, :int16, :int32, :int64, :float32, :float64, :complex64,
1397
+ * :complex128, :rational32, :rational64, :rational128, or :object (the last is a Ruby object).
1398
+ */
1399
+ static VALUE nm_dtype(VALUE self) {
1400
+ ID dtype = rb_intern(nm_dtypestring[NM_DTYPE(self)]);
1401
+ return ID2SYM(dtype);
1402
+ }
1403
+
1404
+
1405
+ /* Interprets cblas argument which could be any of false/:no_transpose, :transpose, or :complex_conjugate,
1406
+ * into an enum recognized by cblas.
1407
+ *
1408
+ * Called by nm_cblas_gemm -- basically inline.
1409
+ *
1410
+ */
1411
+ static char gemm_op_sym(VALUE op) {
1412
+ if (op == false || rb_to_id(op) == nm_id_no_transpose) return CblasNoTrans;
1413
+ else if (rb_to_id(op) == nm_id_transpose) return CblasTrans;
1414
+ else if (rb_to_id(op) == nm_id_complex_conjugate) return CblasConjTrans;
1415
+ else rb_raise(rb_eArgError, "Expected false, :transpose, or :complex_conjugate");
1416
+ return CblasNoTrans;
1417
+ }
1418
+
1419
+
1420
+ /* Call any of the cblas_xgemm functions as directly as possible.
1421
+ *
1422
+ * The cblas_xgemm functions (dgemm, sgemm, cgemm, and zgemm) define the following operation:
1423
+ *
1424
+ * C = alpha*op(A)*op(B) + beta*C
1425
+ *
1426
+ * where op(X) is one of <tt>op(X) = X</tt>, <tt>op(X) = X**T</tt>, or the complex conjugate of X.
1427
+ *
1428
+ * Note that this will only work for dense matrices that are of types :float32, :float64, :complex64, and :complex128.
1429
+ * Other types are not implemented in BLAS, and while they exist in NMatrix, this method is intended only to
1430
+ * expose the ultra-optimized ATLAS versions.
1431
+ *
1432
+ * == Arguments
1433
+ * See: http://www.netlib.org/blas/dgemm.f
1434
+ *
1435
+ * You probably don't want to call this function. Instead, why don't you try cblas_gemm, which is more flexible
1436
+ * with its arguments?
1437
+ *
1438
+ * This function does almost no type checking. Seriously, be really careful when you call it! There's no exception
1439
+ * handling, so you can easily crash Ruby!
1440
+ */
1441
+ static VALUE nm_cblas_gemm(VALUE self,
1442
+ VALUE trans_a, VALUE trans_b,
1443
+ VALUE m, VALUE n, VALUE k,
1444
+ VALUE alpha,
1445
+ VALUE a, VALUE lda,
1446
+ VALUE b, VALUE ldb,
1447
+ VALUE beta,
1448
+ VALUE c, VALUE ldc)
1449
+ {
1450
+ struct cblas_param_t p = cblas_params_for_multiply(((DENSE_STORAGE*)(NM_STORAGE(a))), ((DENSE_STORAGE*)(NM_STORAGE(b))), ((DENSE_STORAGE*)(NM_STORAGE(c))), false);
1451
+ p.M = FIX2INT(m);
1452
+ p.N = FIX2INT(n);
1453
+ p.K = FIX2INT(k);
1454
+
1455
+ p.lda = FIX2INT(lda);
1456
+ p.ldb = FIX2INT(ldb);
1457
+ p.ldc = FIX2INT(ldc);
1458
+
1459
+ switch(NM_DTYPE(c)) {
1460
+ case NM_FLOAT32:
1461
+ case NM_FLOAT64:
1462
+ p.alpha.d[0] = NUM2DBL(alpha);
1463
+ p.beta.d[0] = NUM2DBL(beta);
1464
+ break;
1465
+
1466
+ case NM_COMPLEX64:
1467
+ p.alpha.c[0].r = REAL2DBL(alpha);
1468
+ p.alpha.c[0].i = IMAG2DBL(alpha);
1469
+ p.beta.c[0].r = REAL2DBL(beta);
1470
+ p.beta.c[0].i = IMAG2DBL(beta);
1471
+ break;
1472
+
1473
+ case NM_COMPLEX128:
1474
+ p.alpha.z.r = REAL2DBL(alpha);
1475
+ p.alpha.z.i = IMAG2DBL(alpha);
1476
+ p.beta.z.r = REAL2DBL(beta);
1477
+ p.beta.z.i = IMAG2DBL(beta);
1478
+ break;
1479
+
1480
+ case NM_BYTE:
1481
+ p.alpha.b[0] = FIX2INT(alpha);
1482
+ p.beta.b[0] = FIX2INT(beta);
1483
+ break;
1484
+
1485
+ case NM_INT8:
1486
+ case NM_INT16:
1487
+ case NM_INT32:
1488
+ case NM_INT64:
1489
+ p.alpha.i[0] = FIX2INT(alpha);
1490
+ p.beta.i[0] = FIX2INT(beta);
1491
+ break;
1492
+
1493
+ case NM_RATIONAL32:
1494
+ p.alpha.r[0].n = NUMER2INT(alpha);
1495
+ p.alpha.r[0].d = DENOM2INT(alpha);
1496
+ p.beta.r[0].n = NUMER2INT(beta);
1497
+ p.beta.r[0].d = DENOM2INT(beta);
1498
+ break;
1499
+
1500
+ case NM_RATIONAL64:
1501
+ p.alpha.ra[0].n = NUMER2INT(alpha);
1502
+ p.alpha.ra[0].d = DENOM2INT(alpha);
1503
+ p.beta.ra[0].n = NUMER2INT(beta);
1504
+ p.beta.ra[0].d = DENOM2INT(beta);
1505
+ break;
1506
+
1507
+ case NM_RATIONAL128:
1508
+ p.alpha.rat.n = NUMER2INT(alpha);
1509
+ p.alpha.rat.d = DENOM2INT(alpha);
1510
+ p.beta.rat.n = NUMER2INT(beta);
1511
+ p.beta.rat.d = DENOM2INT(beta);
1512
+ break;
1513
+
1514
+ case NM_ROBJ:
1515
+ p.alpha.v[0] = alpha;
1516
+ p.beta.v[0] = beta;
1517
+ break;
1518
+
1519
+ default:
1520
+ rb_raise(nm_eDataTypeError, "unexpected dtype");
1521
+
1522
+ }
1523
+
1524
+ /* fprintf(stderr, "cblas_gemm: %d %d %d %d %d %f %d %d %f %d\n", trans_a_, trans_b_,
1525
+ m_, n_, k_, alpha_, lda_, ldb_, beta_, ldc_); */
1526
+
1527
+ GemmFuncs[NM_DTYPE(c)](CblasRowMajor, gemm_op_sym(trans_a), gemm_op_sym(trans_b), p);
1528
+
1529
+ return Qtrue;
1530
+ }
1531
+
1532
+
1533
+ /* Call any of the cblas_xgemv functions as directly as possible.
1534
+ *
1535
+ * The cblas_xgemv functions (dgemv, sgemv, cgemv, and zgemv) define the following operation:
1536
+ *
1537
+ * y = alpha*op(A)*x + beta*y
1538
+ *
1539
+ * where op(A) is one of <tt>op(A) = A</tt>, <tt>op(A) = A**T</tt>, or the complex conjugate of A.
1540
+ *
1541
+ * Note that this will only work for dense matrices that are of types :float32, :float64, :complex64, and :complex128.
1542
+ * Other types are not implemented in BLAS, and while they exist in NMatrix, this method is intended only to
1543
+ * expose the ultra-optimized ATLAS versions.
1544
+ *
1545
+ * == Arguments
1546
+ * See: http://www.netlib.org/blas/dgemm.f
1547
+ *
1548
+ * You probably don't want to call this function. Instead, why don't you try cblas_gemv, which is more flexible
1549
+ * with its arguments?
1550
+ *
1551
+ * This function does almost no type checking. Seriously, be really careful when you call it! There's no exception
1552
+ * handling, so you can easily crash Ruby!
1553
+ */
1554
+ static VALUE nm_cblas_gemv(VALUE self,
1555
+ VALUE trans_a,
1556
+ VALUE m, VALUE n,
1557
+ VALUE alpha,
1558
+ VALUE a, VALUE lda,
1559
+ VALUE x, VALUE incx,
1560
+ VALUE beta,
1561
+ VALUE y, VALUE incy) {
1562
+
1563
+ struct cblas_param_t p;
1564
+ p.M = FIX2INT(m);
1565
+ p.N = FIX2INT(n);
1566
+ p.A = ((DENSE_STORAGE*)(NM_STORAGE(a)))->elements;
1567
+ p.B = ((DENSE_STORAGE*)(NM_STORAGE(x)))->elements;
1568
+ p.C = ((DENSE_STORAGE*)(NM_STORAGE(y)))->elements;
1569
+ p.lda = FIX2INT(lda);
1570
+ p.ldb = FIX2INT(incx);
1571
+ p.ldc = FIX2INT(incy);
1572
+
1573
+ switch(NM_DTYPE(y)) {
1574
+ case NM_FLOAT32:
1575
+ case NM_FLOAT64:
1576
+ p.alpha.d[0] = REAL2DBL(alpha);
1577
+ p.beta.d[0] = REAL2DBL(beta);
1578
+ break;
1579
+ case NM_COMPLEX64:
1580
+ p.alpha.c[0].r = REAL2DBL(alpha);
1581
+ p.alpha.c[0].i = IMAG2DBL(alpha);
1582
+ p.beta.c[0].r = REAL2DBL(beta);
1583
+ p.beta.c[0].i = IMAG2DBL(beta);
1584
+ break;
1585
+ case NM_COMPLEX128:
1586
+ p.alpha.z.r = REAL2DBL(alpha);
1587
+ p.alpha.z.i = IMAG2DBL(alpha);
1588
+ p.beta.z.r = REAL2DBL(beta);
1589
+ p.beta.z.i = IMAG2DBL(beta);
1590
+ break;
1591
+ }
1592
+
1593
+ /* fprintf(stderr, "cblas_gemm: %d %d %d %d %d %f %d %d %f %d\n", trans_a_, trans_b_,
1594
+ m_, n_, k_, alpha_, lda_, ldb_, beta_, ldc_); */
1595
+
1596
+ GemvFuncs[NM_DTYPE(y)](CblasRowMajor, gemm_op_sym(trans_a), p);
1597
+
1598
+ return Qtrue;
1599
+ }
1600
+
1601
+
1602
+ /*
1603
+ * Find the capacity of an NMatrix. The capacity only differs from the size for Yale matrices, which occasionally
1604
+ * allocate more space than they need. For list and dense, capacity gives the number of elements in the matrix.
1605
+ */
1606
+ static VALUE nm_capacity(VALUE self) {
1607
+ VALUE cap;
1608
+
1609
+ switch(NM_STYPE(self)) {
1610
+ case S_YALE:
1611
+ cap = UINT2NUM(((YALE_STORAGE*)(NM_STORAGE(self)))->capacity);
1612
+ break;
1613
+
1614
+ case S_DENSE:
1615
+ cap = UINT2NUM(count_dense_storage_elements( (DENSE_STORAGE*)(NM_STORAGE(self)) ));
1616
+ break;
1617
+
1618
+ case S_LIST:
1619
+ cap = UINT2NUM(count_list_storage_elements( (LIST_STORAGE*)(NM_STORAGE(self)) ));
1620
+ break;
1621
+
1622
+ default:
1623
+ //rb_raise(rb_eNotImpError, "TODO: implement capacity/size on other storage types");
1624
+ rb_raise(nm_eStorageTypeError, "unrecognized stype");
1625
+ }
1626
+
1627
+ return cap;
1628
+ }
1629
+
1630
+
1631
+ /*
1632
+ * Get the size of a Yale matrix (the number of elements actually stored).
1633
+ *
1634
+ * For capacity (the maximum number of elements that can be stored without a resize), use capacity instead.
1635
+ */
1636
+ static VALUE nm_yale_size(VALUE self) {
1637
+ VALUE sz;
1638
+ YALE_STORAGE* s = (YALE_STORAGE*)NM_STORAGE(self);
1639
+
1640
+ if (NM_STYPE(self) != S_YALE) rb_raise(nm_eStorageTypeError, "wrong storage type");
1641
+
1642
+ SetFuncs[NM_ROBJ][s->index_dtype](1, &sz, 0, (YALE_SIZE_PTR((s), nm_sizeof[s->index_dtype])), 0);
1643
+ return sz;
1644
+ }
1645
+
1646
+
1647
+ /*
1648
+ * Get the A array of a Yale matrix (which stores the diagonal and the LU portions of the matrix).
1649
+ */
1650
+ static VALUE nm_yale_a(VALUE self) {
1651
+ y_size_t sz, i;
1652
+ void* vals;
1653
+ VALUE ary;
1654
+ YALE_STORAGE* s = (YALE_STORAGE*)NM_STORAGE(self);
1655
+
1656
+ if (NM_STYPE(self) != S_YALE) rb_raise(nm_eStorageTypeError, "wrong storage type");
1657
+
1658
+ YaleGetSize(sz, s);
1659
+ vals = ALLOC_N(char, nm_sizeof[NM_ROBJ]*sz);
1660
+
1661
+ SetFuncs[NM_ROBJ][s->dtype](sz, vals, nm_sizeof[NM_ROBJ], s->a, nm_sizeof[s->dtype]);
1662
+ ary = rb_ary_new4(sz, vals);
1663
+
1664
+ for (i = sz; i < s->capacity; ++i)
1665
+ rb_ary_push(ary, Qnil);
1666
+
1667
+ return ary;
1668
+ }
1669
+
1670
+
1671
+ /*
1672
+ * Get the diagonal ("D") portion of the A array of a Yale matrix.
1673
+ */
1674
+ static VALUE nm_yale_d(VALUE self) {
1675
+ y_size_t sz;
1676
+ void* vals;
1677
+ VALUE ary;
1678
+ YALE_STORAGE* s = (YALE_STORAGE*)NM_STORAGE(self);
1679
+
1680
+ if (NM_STYPE(self) != S_YALE) rb_raise(nm_eStorageTypeError, "wrong storage type");
1681
+
1682
+ YaleGetSize(sz, s);
1683
+ vals = ALLOC_N(char, nm_sizeof[NM_ROBJ]*s->shape[0]);
1684
+
1685
+ SetFuncs[NM_ROBJ][s->dtype](s->shape[0], vals, nm_sizeof[NM_ROBJ], s->a, nm_sizeof[s->dtype]);
1686
+ ary = rb_ary_new4(s->shape[0], vals);
1687
+
1688
+ return ary;
1689
+ }
1690
+
1691
+
1692
+ /*
1693
+ * Get the non-diagonal ("LU") portion of the A array of a Yale matrix.
1694
+ */
1695
+ static VALUE nm_yale_lu(VALUE self) {
1696
+ y_size_t sz, i;
1697
+ void* vals;
1698
+ VALUE ary;
1699
+ YALE_STORAGE* s = (YALE_STORAGE*)NM_STORAGE(self);
1700
+
1701
+ if (NM_STYPE(self) != S_YALE) rb_raise(nm_eStorageTypeError, "wrong storage type");
1702
+
1703
+ YaleGetSize(sz, s);
1704
+ vals = ALLOC_N(char, nm_sizeof[NM_ROBJ]*(s->capacity - s->shape[0]));
1705
+
1706
+ SetFuncs[NM_ROBJ][s->dtype](sz - s->shape[0] - 1, vals, nm_sizeof[NM_ROBJ], (char*)(s->a) + (s->shape[0] + 1)*nm_sizeof[s->dtype], nm_sizeof[s->dtype]);
1707
+ ary = rb_ary_new4(sz - s->shape[0] - 1, vals);
1708
+
1709
+ for (i = sz; i < s->capacity; ++i)
1710
+ rb_ary_push(ary, Qnil);
1711
+
1712
+ return ary;
1713
+ }
1714
+
1715
+
1716
+ /*
1717
+ * Get the IA portion of the IJA array of a Yale matrix. This gives the start and end positions of rows in the
1718
+ * JA and LU portions of the IJA and A arrays, respectively.
1719
+ */
1720
+ static VALUE nm_yale_ia(VALUE self) {
1721
+ y_size_t sz;
1722
+ void* vals;
1723
+ VALUE ary;
1724
+ YALE_STORAGE* s = (YALE_STORAGE*)NM_STORAGE(self);
1725
+
1726
+ if (NM_STYPE(self) != S_YALE) rb_raise(nm_eStorageTypeError, "wrong storage type");
1727
+
1728
+ YaleGetSize(sz, s);
1729
+ vals = ALLOC_N(char, nm_sizeof[NM_ROBJ]*(s->shape[0]+1));
1730
+
1731
+ SetFuncs[NM_ROBJ][s->index_dtype](s->shape[0]+1, vals, nm_sizeof[NM_ROBJ], s->ija, nm_sizeof[s->index_dtype]);
1732
+ ary = rb_ary_new4(s->shape[0]+1, vals);
1733
+
1734
+ return ary;
1735
+ }
1736
+
1737
+
1738
+ /*
1739
+ * Get the JA portion of the IJA array of a Yale matrix. This gives the column indices for entries in corresponding
1740
+ * positions in the LU portion of the A array.
1741
+ */
1742
+ static VALUE nm_yale_ja(VALUE self) {
1743
+ y_size_t sz, i;
1744
+ void* vals;
1745
+ VALUE ary;
1746
+ YALE_STORAGE* s = (YALE_STORAGE*)NM_STORAGE(self);
1747
+
1748
+ if (NM_STYPE(self) != S_YALE) rb_raise(nm_eStorageTypeError, "wrong storage type");
1749
+
1750
+ YaleGetSize(sz, s);
1751
+ vals = ALLOC_N(char, nm_sizeof[NM_ROBJ]*(s->capacity - s->shape[0]));
1752
+
1753
+ SetFuncs[NM_ROBJ][s->index_dtype](sz - s->shape[0] - 1, vals, nm_sizeof[NM_ROBJ], (char*)(s->ija) + (s->shape[0] + 1)*nm_sizeof[s->index_dtype], nm_sizeof[s->index_dtype]);
1754
+ ary = rb_ary_new4(sz - s->shape[0] - 1, vals);
1755
+
1756
+ for (i = sz; i < s->capacity; ++i)
1757
+ rb_ary_push(ary, Qnil);
1758
+
1759
+ return ary;
1760
+ }
1761
+
1762
+
1763
+ /*
1764
+ * Get the IJA array of a Yale matrix.
1765
+ */
1766
+ static VALUE nm_yale_ija(VALUE self) {
1767
+ y_size_t sz, i;
1768
+ void* vals;
1769
+ VALUE ary;
1770
+ YALE_STORAGE* s = (YALE_STORAGE*)NM_STORAGE(self);
1771
+
1772
+ if (NM_STYPE(self) != S_YALE) rb_raise(nm_eStorageTypeError, "wrong storage type");
1773
+
1774
+ YaleGetSize(sz, s);
1775
+ vals = ALLOC_N(char, nm_sizeof[NM_ROBJ]*s->capacity);
1776
+
1777
+ SetFuncs[NM_ROBJ][s->index_dtype](sz, vals, nm_sizeof[NM_ROBJ], s->ija, nm_sizeof[s->index_dtype]);
1778
+ ary = rb_ary_new4(sz, vals);
1779
+
1780
+ for (i = sz; i < s->capacity; ++i)
1781
+ rb_ary_push(ary, Qnil);
1782
+
1783
+ return ary;
1784
+ }
1785
+
1786
+
1787
+ // This is probably faster and smaller than writing an array of transpose functions. But if you want to see what it would look like,
1788
+ // see transp.template.c (not the yale one).
1789
+ //
1790
+ // Note that this is a copy-transpose. In-place transpose is a whole different operation and bag of worms.
1791
+ static void dense_transpose_generic(const unsigned int M, const unsigned int N, const char* A, const int lda, char* B, const int ldb, size_t dtype_size) {
1792
+ unsigned int i, j;
1793
+
1794
+ for (i = 0; i < N; ++i) {
1795
+ for (j = 0; j < M; ++j) {
1796
+ memcpy(B + (i*ldb+j)*dtype_size, A + (j*lda+i)*dtype_size, dtype_size);
1797
+ }
1798
+ }
1799
+ }
1800
+
1801
+
1802
+ /*
1803
+ * Create a transposed copy of this matrix.
1804
+ */
1805
+ static VALUE nm_transpose_new(VALUE self) {
1806
+ NMATRIX *self_m, *result, *result2;
1807
+ size_t sz;
1808
+ size_t* shape = ALLOC_N(size_t, 2);
1809
+ YALE_PARAM A, B;
1810
+ #ifdef BENCHMARK
1811
+ double t1, t2;
1812
+ #endif
1813
+
1814
+ UnwrapNMatrix( self, self_m );
1815
+
1816
+ // switch the dimensions
1817
+ shape[1] = self_m->storage->shape[0];
1818
+ shape[0] = self_m->storage->shape[1];
1819
+
1820
+ switch(self_m->stype) {
1821
+ case S_DENSE:
1822
+ result = nm_create(S_DENSE, create_dense_storage(self_m->storage->dtype, shape, 2, NULL, 0));
1823
+ dense_transpose_generic(
1824
+ self_m->storage->shape[0],
1825
+ self_m->storage->shape[1],
1826
+ ((DENSE_STORAGE*)(self_m->storage))->elements,
1827
+ self_m->storage->shape[1],
1828
+ ((DENSE_STORAGE*)(result->storage))->elements,
1829
+ result->storage->shape[1],
1830
+ nm_sizeof[self_m->storage->dtype]);
1831
+
1832
+ break;
1833
+ case S_YALE:
1834
+ YaleGetSize(sz, (YALE_STORAGE*)(self_m->storage)); // size of new matrix is going to be size of old matrix
1835
+ result = nm_create(S_YALE, create_yale_storage(self_m->storage->dtype, shape, 2, sz));
1836
+
1837
+ // TODO: Do we really need to initialize the whole thing? Or just the A portion?
1838
+ init_yale_storage((YALE_STORAGE*)(result->storage));
1839
+
1840
+ result2 = nm_create(S_YALE, create_yale_storage(self_m->storage->dtype, shape, 2, sz));
1841
+ init_yale_storage((YALE_STORAGE*)(result2->storage));
1842
+
1843
+ A.ia = A.ja = ((YALE_STORAGE*)(self_m->storage))->ija;
1844
+ B.ia = B.ja = ((YALE_STORAGE*)(result->storage))->ija;
1845
+ A.a = ((YALE_STORAGE*)(self_m->storage))->a;
1846
+ B.a = ((YALE_STORAGE*)(result->storage))->a;
1847
+ A.diag = true;
1848
+
1849
+ #ifdef BENCHMARK
1850
+ t1 = get_time();
1851
+ #endif
1852
+
1853
+ // call the appropriate function pointer
1854
+ SparseTransposeFuncs[ self_m->storage->dtype ][ ((YALE_STORAGE*)(self_m->storage))->index_dtype ](shape[0], shape[1], A, B, true);
1855
+ #ifdef BENCHMARK
1856
+ t1 = get_time() - t1;
1857
+ /*
1858
+ t2 = get_time();
1859
+ transp(
1860
+ shape[0],
1861
+ shape[1],
1862
+ ((YALE_STORAGE*)(self_m->storage))->ija,
1863
+ ((YALE_STORAGE*)(self_m->storage))->ija,
1864
+ true,
1865
+ ((YALE_STORAGE*)(self_m->storage))->a,
1866
+ ((YALE_STORAGE*)(result2->storage))->ija,
1867
+ ((YALE_STORAGE*)(result2->storage))->ija,
1868
+ ((YALE_STORAGE*)(result2->storage))->a,
1869
+ true, // move
1870
+ ((YALE_STORAGE*)(self_m->storage))->index_dtype,
1871
+ self_m->storage->dtype
1872
+ );
1873
+
1874
+ t2 = get_time() - t2;
1875
+ fprintf(stderr, "t1: %f\nt2: %f\n", t1, t2);
1876
+ */
1877
+ #endif
1878
+
1879
+ break;
1880
+ default:
1881
+ rb_raise(rb_eNotImpError, "transpose for this type not implemented yet");
1882
+ }
1883
+
1884
+ return Data_Wrap_Struct(cNMatrix, MarkFuncs[result->stype], nm_delete, result);
1885
+ }
1886
+
1887
+ //static VALUE nm_transpose_auto(VALUE self) {
1888
+ //
1889
+ //}
1890
+
1891
+ void Init_nmatrix() {
1892
+ /* Require Complex class */
1893
+ //rb_require("complex");
1894
+ //cComplex = rb_const_get( rb_cObject, rb_intern("Complex") );
1895
+
1896
+ /* Define NMatrix class */
1897
+ cNMatrix = rb_define_class("NMatrix", rb_cObject);
1898
+
1899
+ /* class methods */
1900
+ rb_define_singleton_method(cNMatrix, "__cblas_gemm__", nm_cblas_gemm, 13);
1901
+ rb_define_singleton_method(cNMatrix, "__cblas_gemv__", nm_cblas_gemv, 11);
1902
+
1903
+ rb_define_alloc_func(cNMatrix, nm_alloc);
1904
+ rb_define_method(cNMatrix, "initialize", nm_init, -1);
1905
+ // rb_define_singleton_method(cNMatrix, "new", nm_init, -1);
1906
+
1907
+
1908
+ rb_define_method(cNMatrix, "initialize_copy", nm_init_copy, 1);
1909
+ rb_define_method(cNMatrix, "initialize_cast_copy", nm_init_cast_copy, 2);
1910
+ rb_define_method(cNMatrix, "as_dtype", nm_cast_copy, 1);
1911
+
1912
+ /* methods */
1913
+ rb_define_method(cNMatrix, "dtype", nm_dtype, 0);
1914
+ rb_define_method(cNMatrix, "stype", nm_stype, 0);
1915
+ rb_define_method(cNMatrix, "cast", nm_scast_copy, 2);
1916
+
1917
+ rb_define_method(cNMatrix, "[]", nm_mref, -1);
1918
+ rb_define_method(cNMatrix, "[]=", nm_mset, -1);
1919
+ rb_define_method(cNMatrix, "rank", nm_rank, 0);
1920
+ rb_define_alias(cNMatrix, "dim", "rank");
1921
+ rb_define_method(cNMatrix, "shape", nm_shape, 0);
1922
+ rb_define_method(cNMatrix, "transpose", nm_transpose_new, 0);
1923
+ //rb_define_method(cNMatrix, "transpose!", nm_transpose_auto, 0);
1924
+
1925
+ rb_define_method(cNMatrix, "each", nm_each, 0);
1926
+
1927
+ rb_define_method(cNMatrix, "*", nm_ew_multiply, 1);
1928
+ rb_define_method(cNMatrix, "/", nm_ew_divide, 1);
1929
+ rb_define_method(cNMatrix, "+", nm_ew_add, 1);
1930
+ rb_define_method(cNMatrix, "-", nm_ew_subtract, 1);
1931
+ rb_define_method(cNMatrix, "==", nm_ew_eqeq, 1);
1932
+ rb_define_method(cNMatrix, "!=", nm_ew_neq, 1);
1933
+ rb_define_method(cNMatrix, "<=", nm_ew_leq, 1);
1934
+ rb_define_method(cNMatrix, ">=", nm_ew_geq, 1);
1935
+ rb_define_method(cNMatrix, "<", nm_ew_lt, 1);
1936
+ rb_define_method(cNMatrix, ">", nm_ew_gt, 1);
1937
+ rb_define_method(cNMatrix, "equal?", nm_eqeq, 1);
1938
+ rb_define_method(cNMatrix, "dot", nm_multiply, 1);
1939
+ rb_define_alias(cNMatrix, "equal?", "eql?");
1940
+
1941
+
1942
+ rb_define_method(cNMatrix, "capacity", nm_capacity, 0);
1943
+
1944
+ rb_define_method(cNMatrix, "__yale_ija__", nm_yale_ija, 0);
1945
+ rb_define_method(cNMatrix, "__yale_a__", nm_yale_a, 0);
1946
+ rb_define_method(cNMatrix, "__yale_size__", nm_yale_size, 0);
1947
+ rb_define_method(cNMatrix, "__yale_ia__", nm_yale_ia, 0);
1948
+ rb_define_method(cNMatrix, "__yale_ja__", nm_yale_ja, 0);
1949
+ rb_define_method(cNMatrix, "__yale_d__", nm_yale_d, 0);
1950
+ rb_define_method(cNMatrix, "__yale_lu__", nm_yale_lu, 0);
1951
+ rb_define_const(cNMatrix, "YALE_GROWTH_CONSTANT", rb_float_new(YALE_GROWTH_CONSTANT));
1952
+
1953
+
1954
+ cNVector = rb_define_class("NVector", cNMatrix);
1955
+
1956
+ // Special exceptions
1957
+ nm_eDataTypeError = rb_define_class("DataTypeError", rb_eStandardError);
1958
+ nm_eStorageTypeError = rb_define_class("StorageTypeError", rb_eStandardError);
1959
+
1960
+ nm_id_real = rb_intern("real");
1961
+ nm_id_imag = rb_intern("imag");
1962
+ nm_id_numer = rb_intern("numerator");
1963
+ nm_id_denom = rb_intern("denominator");
1964
+ nm_id_mult = rb_intern("*");
1965
+ nm_id_add = rb_intern("+");
1966
+ nm_id_multeq= rb_intern("*=");
1967
+
1968
+ nm_id_transpose = rb_intern("transpose");
1969
+ nm_id_no_transpose = rb_intern("no_transpose");
1970
+ nm_id_complex_conjugate = rb_intern("complex_conjugate");
1971
+
1972
+ nm_id_dense = rb_intern("dense");
1973
+ nm_id_list = rb_intern("list");
1974
+
1975
+ }
1976
+
1977
+ #endif